Whamcloud - gitweb
Landing b1_4_quotaoff to b1_4
authorgreen <green>
Fri, 1 Apr 2005 21:32:29 +0000 (21:32 +0000)
committergreen <green>
Fri, 1 Apr 2005 21:32:29 +0000 (21:32 +0000)
78 files changed:
ldiskfs/ldiskfs/Makefile.in
ldiskfs/ldiskfs/autoMakefile.am
lustre/doc/quota_hld.lyx [new file with mode: 0644]
lustre/include/liblustre.h
lustre/include/linux/Makefile.am
lustre/include/linux/lustre_fsfilt.h
lustre/include/linux/lustre_idl.h
lustre/include/linux/lustre_lib.h
lustre/include/linux/lustre_log.h
lustre/include/linux/lustre_quota.h [new file with mode: 0644]
lustre/include/linux/obd.h
lustre/include/linux/obd_class.h
lustre/include/linux/obd_support.h
lustre/include/lustre/liblustreapi.h
lustre/include/lustre/lustre_user.h
lustre/kernel_patches/patches/kallsyms-2.4-bgl.patch
lustre/ldiskfs/Makefile.in
lustre/ldiskfs/autoMakefile.am
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_lockd.c
lustre/llite/dir.c
lustre/llite/llite_internal.h
lustre/llite/rw.c
lustre/lov/lov_log.c
lustre/lov/lov_obd.c
lustre/lvfs/Makefile.in
lustre/lvfs/autoMakefile.am
lustre/lvfs/fsfilt_ext3.c
lustre/lvfs/lvfs_linux.c
lustre/lvfs/quotacheck_test.c [new file with mode: 0644]
lustre/lvfs/quotactl_test.c [new file with mode: 0644]
lustre/mdc/mdc_locks.c
lustre/mdc/mdc_request.c
lustre/mds/Makefile.in
lustre/mds/autoMakefile.am
lustre/mds/handler.c
lustre/mds/lproc_mds.c
lustre/mds/mds_fs.c
lustre/mds/mds_internal.h
lustre/mds/mds_log.c
lustre/mds/mds_lov.c
lustre/mds/mds_open.c
lustre/mds/mds_reint.c
lustre/mds/quota_context.c [new file with mode: 0644]
lustre/mds/quota_master.c [new file with mode: 0644]
lustre/obdclass/class_obd.c
lustre/obdclass/genops.c
lustre/obdclass/llog_ioctl.c
lustre/obdclass/llog_lvfs.c
lustre/obdclass/llog_obd.c
lustre/obdclass/llog_swab.c
lustre/obdclass/lprocfs_status.c
lustre/obdfilter/Makefile.in
lustre/obdfilter/filter.c
lustre/obdfilter/filter_internal.h
lustre/obdfilter/filter_io.c
lustre/obdfilter/filter_io_26.c
lustre/obdfilter/filter_log.c
lustre/obdfilter/lproc_obdfilter.c
lustre/osc/Makefile.in
lustre/osc/osc_internal.h
lustre/osc/osc_quota.c [new file with mode: 0644]
lustre/osc/osc_request.c
lustre/ost/ost_handler.c
lustre/ptlrpc/llog_net.c
lustre/ptlrpc/lproc_ptlrpc.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/ptlrpc_module.c
lustre/tests/lov.sh
lustre/tests/quota_sanity.sh [new file with mode: 0644]
lustre/tests/replay-single.sh
lustre/tests/run-quotacheck.sh [new file with mode: 0644]
lustre/tests/run-quotactl.sh [new file with mode: 0644]
lustre/tests/run-quotafmt.sh [new file with mode: 0644]
lustre/utils/lfs.c
lustre/utils/liblustreapi.c
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index e2aff01..be51da2 100644 (file)
@@ -1,6 +1,6 @@
 default: all
 
-MODULES := ldiskfs
+MODULES := ldiskfs quotafmt_test
 
 # copy makefile over to not break patches
 ext3_extra := $(wildcard @LINUX@/fs/ext3/Makefile)
@@ -11,9 +11,13 @@ linux_headers := $(wildcard @LINUX@/include/linux/ext3*.h)
 ext3_sources := $(filter-out %.mod.c,$(wildcard @LINUX@/fs/ext3/*.c))
 new_sources := iopen.c iopen.h extents.c mballoc.c
 new_headers := ext3_extents.h
-ldiskfs_sources := $(notdir $(ext3_sources) $(ext3_headers)) $(new_sources) $(new_headers)
+quotafmt_sources := lustre_quota_fmt.c
+quotafmt_headers := lustre_quota_fmt.h
+ldiskfs_patched_sources := $(notdir $(ext3_sources) $(ext3_headers)) $(new_sources) $(new_headers)
+ldiskfs_sources := $(ldiskfs_patched_sources) $(quotafmt_sources) $(quotafmt_headers)
 
 ldiskfs-objs := $(filter %.o,$(ldiskfs_sources:.c=.o))
+quotafmt-objs := quotafmt_test.o
 
 EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LUSTRE@ -I@LUSTRE@/ldiskfs
 
index a31e01a..4f9e784 100644 (file)
@@ -6,7 +6,7 @@ endif
 
 ldiskfs_linux_headers := $(addprefix linux/,$(subst ext3,ldiskfs,$(notdir $(linux_headers))))
 
-$(filter %.c,$(ldiskfs_sources)): sources $(ldiskfs_linux_headers) $(filter %.h,$(ldiskfs_sources))
+$(filter %.c,$(ldiskfs_patched_sources)): sources $(ldiskfs_linux_headers) $(filter %.h,$(ldiskfs_patched_sources))
 
 ldiskfs_sed_flags = \
        -e "s/dx_hash_info/ext3_dx_hash_info/g" \
@@ -71,7 +71,9 @@ foo-check:
        @echo "ldiskfs_LDADD: $(ldiskfs_LDADD)"
 
 MOSTLYCLEANFILES = *.o *.ko *.mod.c
-CLEANFILES = sources *.c *.h
+CLEANFILES = sources $(notdir $(linux_headers) $(ext3_headers) $(ext3_sources) $(new_sources) $(new_headers))
+
+EXTRA_DIST := lustre_quota_fmt.c lustre_quota_fmt.h quotafmt_test.c
 
 clean: clean-am
        rm -rf linux linux-stage
diff --git a/lustre/doc/quota_hld.lyx b/lustre/doc/quota_hld.lyx
new file mode 100644 (file)
index 0000000..86d1096
--- /dev/null
@@ -0,0 +1,1231 @@
+#LyX 1.3 created this file. For more info see http://www.lyx.org/
+\lyxformat 221
+\textclass article
+\language english
+\inputencoding auto
+\fontscheme times
+\graphics default
+\paperfontsize default
+\spacing single 
+\papersize Default
+\paperpackage a4
+\use_geometry 0
+\use_amsmath 0
+\use_natbib 0
+\use_numerical_citations 0
+\paperorientation portrait
+\secnumdepth 3
+\tocdepth 3
+\paragraph_separation skip
+\defskip medskip
+\quotes_language english
+\quotes_times 2
+\papercolumns 1
+\papersides 1
+\paperpagestyle default
+
+\layout Title
+
+Quota For Lustre
+\layout Section
+
+From Engineering Requirements Specification
+\layout Enumerate
+
+Lustre can operate and enforce disk block quota and file quota.
+\layout Enumerate
+
+Hard and soft quota are supported
+\layout Enumerate
+
+Central management tools enable setting limits for users and initializing
+ quota check operations
+\layout Enumerate
+
+Quota are only needed for Linux 2.6
+\layout Section
+
+Specification of subsystems
+\layout Description
+
+Definition: An 
+\emph on 
+operational quota file
+\emph default 
+ is a quota database containing limits for some uid's and gid's which is
+ being used to enforce quota.
+ An 
+\emph on 
+administrative quota file
+\emph default 
+ is a similar database, but it is used for recovery and soft quota or administra
+tive purposes.
+\layout Subsection
+
+Master & slaves
+\layout Standard
+
+A node is a master 
+\series bold 
+for a uid or gid 
+\series default 
+if the node holds the cluster wide limits (hard, soft, files, blocks & gracetime
+s) for that uid or gid in an administrative quota file.
+ The administrative quota file is similar to normal ext3 quota file.
+ The data structures and code for an administrative quota file API will
+ be copied from the Linux VFS to ldiskfs and amended.
+ Slave nodes (all other servers) only consider hard quota and only have
+ operational quota files.
+\layout Standard
+
+Note that a node may be a master for some uid's, gid's and a slave for others.
+ Masters also have an operational quota file for enforcing hard quota .
+ Master 
+\series bold 
+observe soft limits in the administrative file, based on grace times
+\series default 
+.
+\layout Subsection
+
+Acquire / release protocol
+\layout Standard
+
+The master administrative quota file has two kinds of limits: total limits
+ and limit acquired by all servers (administrative usage).
+ Totoal limits are set by user, administrative usage is initialized to zero
+ and it's amended when master/slaves acquire or release quota.
+\layout Standard
+
+Quota slaves can acquire from the master and release to the master qunits
+ of disk space (>100MB typically, see ERS).
+ Slaves do this to increase / lower their hard limits of operational file.
+ Upon acquiring quota from a master the master's administrative usage are
+ increased.
+ Master can acqurie/release qunits, just like slaves, except that it is
+ done locally.
+\layout Standard
+
+On the master only, soft limits are enforced in obd layer based on the administr
+ative quota file.
+ Once administrative usage >= administrative soft limit, the timer is activated.
+\layout Subsection
+
+Chown Operations
+\layout Standard
+
+All objects associated with a file will have their owners set to that of
+ the MDS inode.
+ These chown operations occur in connection with file creation and chowning
+ on the MDS and are asynchronous.
+ There will also be enough space in the records to set an EA on the objects
+ indicating the originating MDS, fileset and storage id of the inode.
+ The arguments will contain the following - but the final format of the
+ packet sent is subject to approval by management (it may be larger):
+\layout LyX-Code
+
+struct object_setattr_args {
+\layout LyX-Code
+
+      __u64 osa_mds_id;     /* to identify MDS */
+\layout LyX-Code
+
+      __u64 osa_fileset_id; /* part of the fid, tbd */
+\layout LyX-Code
+
+      __u64 osa_ino;    /* inode number on mds */
+\layout LyX-Code
+
+      __u64 osa_gen;    /* inode generation on mds */
+\layout LyX-Code
+
+      __u32 osa_uid;    /* owner of the file */
+\layout LyX-Code
+
+      __u32 osa_gid;    /* group of the file */
+\layout LyX-Code
+
+      __u64 osa_mds_transno;/* for recovery of mds rollback */
+\layout LyX-Code
+
+      __u64 osa_mds_last_committed;
+\layout LyX-Code
+
+      __u32 osa_mds_prev_uid; /* to undo things that didn't complete on
+ the MDS */
+\layout LyX-Code
+
+      __u32 osa_mds_prev_gid;
+\layout LyX-Code
+
+} 
+\layout Subsection
+
+Recovery
+\layout Standard
+
+A recovery protocol for limits involves
+\layout Description
+
+Master\SpecialChar ~
+recovery re-writing the operational limits on the master node, based
+ on the cluster-wide limits as found in the administrative quota file 
+\layout Description
+
+Slave\SpecialChar ~
+recovery completing aborted release operations on slaves.
+\layout Standard
+
+Chown operations for objects will use llog recovery on the MDS (as it is
+ used for unlinks).
+\layout Standard
+
+MDS chown operations that are lost are not recovered at this point - but
+ arguments to do so in the future are passed as above.
+ The recovery from this is fairly simple: the OST writes log operations
+ for each chown operation containing the MDS transaction number and undo
+ information.
+ The MDS reports last committed transactions to the OST.
+ During normal use these lead to cancellations of records leading up to
+ that transaction.
+ During recovery, all llog records following the record containing the transacti
+on number will be used to undo the OST chown/chgrp operations.
+\layout Standard
+
+For new files, removal of objects does already take place.
+\layout Subsection
+
+Configuration
+\layout Standard
+
+A configuration protocol will initiate quota check operations, turn quota
+ on, and set limits.
+ All commands will be issued through lfs.
+\layout Subsection
+
+Disk fs handling
+\layout Standard
+
+Disk file systems track quota usage.
+ An interface between OSS and MDS and disk file systems will enable a check
+ and adjustment of disk file system quota limits before operations proceed.
+ Every node will try to acquire quota before proceeding.
+ Every node will release quota after finishing.
+ Acquire and release calls are tuned to anticipate use.
+ Disk fs quota check handling will be possibly on busy file systems.
+\layout Section
+
+Use cases
+\layout Standard
+
+Each use case is an interaction between a 
+\begin_inset Quotes eld
+\end_inset 
+
+user
+\begin_inset Quotes erd
+\end_inset 
+
+ and 
+\begin_inset Quotes eld
+\end_inset 
+
+system
+\begin_inset Quotes erd
+\end_inset 
+
+.
+ For each use case we describe what subsystem forms the 
+\begin_inset Quotes eld
+\end_inset 
+
+user
+\begin_inset Quotes erd
+\end_inset 
+
+ and the 
+\begin_inset Quotes eld
+\end_inset 
+
+system
+\begin_inset Quotes erd
+\end_inset 
+
+.
+ Use the logical components indicated in sections 3.1-3.4 below to describe
+ the use cases.
+ The purpose is to check that each of the use cases at a high level appears
+ to execute successfully by using the components listed under 3.1-3.4.
+ In some of the scenarios (e.g.
+ 3.2 multiple use scenarios should be described, e.g.
+ how is the slave-master protocol involved and how is the client - oss protocol
+ involved).
+\layout Subsection
+
+Initialization operation
+\layout Subsubsection
+
+Changing owners 
+\layout Standard
+
+The following operations are done on a client:
+\layout List
+\labelwidthstring 00.00.0000
+
+Administrator get root priviliges on the file system
+\layout List
+\labelwidthstring 00.00.0000
+
+Administrator run `find <mnt> -type f | xargs lchog`
+\begin_deeper 
+\layout Enumerate
+
+<mnt> is mount point
+\layout Enumerate
+
+
+\emph on 
+lchog
+\emph default 
+ is a small utility to do chown/chgrp, its usage: 
+\begin_deeper 
+\layout Standard
+
+
+\emph on 
+lchog [-i] FILE...
+
+\emph default 
+\layout Description
+
+
+\emph on 
+-i
+\emph default 
+ ignore ENOENT error
+\end_deeper 
+\end_deeper 
+\layout List
+\labelwidthstring 00.00.0000
+
+System 
+\emph on 
+lchog
+\emph default 
+ will abort if change failed, and then report error, indicating what was
+ searched etc.
+ Generally user cannot ignore the error, and should fix it and redo the
+ above before the next operation, except that user can set 
+\emph on 
+-i
+\emph default 
+ option for 
+\emph on 
+lchog
+\emph default 
+ to ignore ENOENT error.
+\layout Subsubsection
+
+Mounting existing file systems with quota support
+\layout List
+\labelwidthstring 00.00.0000
+
+Administrator file systems on all server nodes should be mounted with quota
+ support, this can be done by running
+\emph on 
+ lconf
+\emph default 
+ on all nodes: 
+\emph on 
+lconf --mountfsoptions quota ...,
+\emph default 
+ if the file system has already been mounted, it should be umounted first.
+\layout List
+\labelwidthstring 00.00.0000
+
+System all needed modules are loaded, and file systems are mounted with
+ quota support.
+\layout List
+\labelwidthstring 00.00.0000
+
+Administrator run `lfs quotacheck`, it will initiate quota check on all
+ MDS' and OSTs one by one.
+\layout List
+\labelwidthstring 00.00.0000
+
+System on each node ``quotacheck'' will walk through the diskfs.
+ When the check finishes, it will report the check status to the initiator.
+ If it failed, the error is listed.
+\layout List
+\labelwidthstring 00.00.0000
+
+Administrator user should fix the errors and recheck the specified nodes
+ before preceeding to the next step.
+\layout List
+\labelwidthstring 00.00.0000
+
+Administrator run `lfs quotaon`, it will initiate quotaon on all MDS' and
+ OSTs one by one.
+\layout List
+\labelwidthstring 00.00.0000
+
+System each node will start to check/handle quota.
+ The status will be reported back to the initiator.
+\layout List
+\labelwidthstring 00.00.0000
+
+Administrator user should fix the errors if there are.
+\layout List
+\labelwidthstring 00.00.0000
+
+Administrator run `lfs setquota`, it will set limits on the corresponding
+ MDS master for the specified uid/gid.
+\layout List
+\labelwidthstring 00.00.0000
+
+System if it's the first time to set limits, master will initialize quota
+ on all slaves, otherwise only modify the quota of itself.
+ Moreover, the limit info is saved in recovery quota file on master.
+
+\series bold 
+\series default 
+The status will be reported to initiator.
+\layout List
+\labelwidthstring 00.00.0000
+
+Administrator if some nodes failed, generally user should not ignore the
+ errors.
+\layout Subsubsection
+
+a new file system to a state where it is using quota
+\layout Standard
+
+Like above, but only need three steps: `lfs quotacheck`, `lfs quotaon` and
+ `lfs setquota`.
+\layout Subsection
+
+Normal use block quota
+\layout Standard
+
+Demonstrate how quota are acquired and released during normal use through
+ sequences of the API's and network calls defined in this document.
+\layout Standard
+
+
+\series bold 
+DESCRIBE CASES WHERE
+\layout Enumerate
+
+A USER DOES THIS OR THAT: WHAT are the system responses
+\layout Enumerate
+
+The client does this or that: what are the OSS & MDS responses
+\layout Enumerate
+
+The OST does this or that, what are the obdfilter / diskfs reponses
+\layout Subsubsection
+
+Acquire quota
+\layout List
+\labelwidthstring 00.00.0000
+
+User issues file write operation.
+\layout List
+\labelwidthstring 00.00.0000
+
+System performs write successfully and returns the written bytes.
+\newline 
+
+\layout List
+\labelwidthstring 00.00.0000
+
+Client makes IO requests to OSS.
+\layout List
+\labelwidthstring 00.00.0000
+
+OSS acquires qunit if needed.
+\layout List
+\labelwidthstring 00.00.0000
+
+Master increase usage in adminstrative file then reply to OSS with granted
+ qunit.
+\layout List
+\labelwidthstring 00.00.0000
+
+OSS updates local operational quota file, performs write operation and replies
+ client the ~noquota flag.
+\newline 
+
+\layout List
+\labelwidthstring 00.00.0000
+
+OST calls obd_commitrw to commit write.
+\layout List
+\labelwidthstring 00.00.0000
+
+Obdfilter if not enough qunit, acquire qunit by dqacq rpc from master, updates
+ local operational quota file after dqacq reply, then performs normal direct
+ write.
+\layout Subsubsection
+
+
+\begin_inset LatexCommand \label{release-quota}
+
+\end_inset 
+
+Release quota
+\layout List
+\labelwidthstring 00.00.0000
+
+User issues truncate or unlink operation.
+\layout List
+\labelwidthstring 00.00.0000
+
+System performs the truncate/unlink operation and returns error code.
+\newline 
+
+\layout List
+\labelwidthstring 00.00.0000
+
+Client makes OST_PUNCH or OST_DESTROY requests to OSS.
+\layout List
+\labelwidthstring 00.00.0000
+
+OSS performs truncate/unlink on objects.
+ release qunit to Master if needed.
+\layout List
+\labelwidthstring 00.00.0000
+
+Master decrease usage in administrative file and reply to OSS.
+\layout List
+\labelwidthstring 00.00.0000
+
+OSS updates local operational quota file.
+\newline 
+
+\layout List
+\labelwidthstring 00.00.0000
+
+OST calls obd_destroy/obd_punch.
+\layout List
+\labelwidthstring 00.00.0000
+
+Obdfilter performs unlink/truncate on objects, if there is qunit to be released,
+ release qunit by dqrel rpc to master then updates local operational quota
+ file.
+\layout Subsection
+
+Running out of block quota
+\layout List
+\labelwidthstring 00.00.0000
+
+User issues file write operation.
+\layout List
+\labelwidthstring 00.00.0000
+
+System write fails and return EDQUOT.
+ (but the pages in cache will be written successfully)
+\newline 
+
+\layout List
+\labelwidthstring 00.00.0000
+
+Client makes IO requests to OSS.
+\layout List
+\labelwidthstring 00.00.0000
+
+OSS acquires qunit from master.
+\layout List
+\labelwidthstring 00.00.0000
+
+Master reply noquota to OSS.
+\layout List
+\labelwidthstring 00.00.0000
+
+OSS fs write fails, rewrites pages from client cache forcibly, replies client
+ the noquota flag and error code.
+\newline 
+
+\layout List
+\labelwidthstring 00.00.0000
+
+OST calls obd_commitrw to commit write.
+\layout List
+\labelwidthstring 00.00.0000
+
+Obdfilter acquiring qunit fails, then performs normal direct write and fails,
+ and then rewrites the pages from client cache, returns error code and noquota
+ flag to OST.
+\layout Subsection
+
+Freeing space to get under quota
+\layout Standard
+
+The release steps are the same as those in 
+\begin_inset LatexCommand \ref{release-quota}
+
+\end_inset 
+
+3.2.2.
+\layout List
+\labelwidthstring 00.00.0000
+
+User issues file write operation.
+\layout List
+\labelwidthstring 00.00.0000
+
+Client makes synchronous write rpc to OSS if there is noquota flag.
+\layout List
+\labelwidthstring 00.00.0000
+
+OSS performs fs write successfully, return client ~noquota flag.
+\layout List
+\labelwidthstring 00.00.0000
+
+Client clears noquota flag for this uid/gid.
+\layout Subsection
+
+Enforcing soft quota
+\layout Subsubsection
+
+Start soft quota timer
+\layout List
+\labelwidthstring 00.00.0000
+
+User issues file write/create operations.
+\layout List
+\labelwidthstring 00.00.0000
+
+System returns successfully.
+\newline 
+
+\layout List
+\labelwidthstring 00.00.0000
+
+Client makes file write/create requests to OSS/MDS.
+\layout List
+\labelwidthstring 00.00.0000
+
+OSS/MDS sends dqacq rpcs to get more quota from master.
+\layout List
+\labelwidthstring 00.00.0000
+
+Master starts the timer once administrative usage >= administrative soft
+ limit and grants qunit to OSS/MDS.
+\layout List
+\labelwidthstring 00.00.0000
+
+OSS/MDS write/create succeeds.
+\layout Subsubsection
+
+Soft quota timer goes off
+\layout List
+\labelwidthstring 00.00.0000
+
+User issues file write/create operations.
+\layout List
+\labelwidthstring 00.00.0000
+
+System returns EDQUOT.
+\newline 
+
+\layout List
+\labelwidthstring 00.00.0000
+
+Client makes file write/create requests to OSS/MDS.
+\layout List
+\labelwidthstring 00.00.0000
+
+OSS/MDS sends dqacq rpcs to get more quota from master.
+\layout List
+\labelwidthstring 00.00.0000
+
+Master returns noquota to OSS/MDS.
+\layout List
+\labelwidthstring 00.00.0000
+
+OSS/MDS write/create fails and returns error code to Client.
+\layout Subsubsection
+
+Stop soft quota timer
+\layout Standard
+
+The release steps are the same as those in 
+\begin_inset LatexCommand \ref{release-quota}
+
+\end_inset 
+
+3.2.2.
+\layout List
+\labelwidthstring 00.00.0000
+
+Slave calls dqrel rpc to release extra quota.
+\layout List
+\labelwidthstring 00.00.0000
+
+Master stops the timer once administrative usage < administrative soft limit.
+\layout Subsection
+
+File quota on the MDS 
+\layout Standard
+
+For CMD, it is similiar to block quota described above.
+ For b1_4, it is completely managed by MDS locally.
+\layout Subsection
+
+Listing quota
+\layout List
+\labelwidthstring 00.00.0000
+
+User runs 'lfs quota', it will make an rpc to the corresponding MDS master
+ for the specified uid/gid.
+\layout List
+\labelwidthstring 00.00.0000
+
+System displays usage & limits related to quota for the uid/gid on all nodes
+ in the cluster.
+ if some nodes failed, reports the error to user.
+\layout List
+\labelwidthstring 00.00.0000
+
+User generally can ignore the errors.
+\layout Subsection
+
+Recovery of quota 
+\layout Standard
+
+
+\series bold 
+just describe interaction initiator - response, no internals
+\layout Subsubsection
+
+Slave recovery 
+\layout List
+\labelwidthstring 00.00.0000
+
+Slave releases unreasonably high limits to master.
+\layout List
+\labelwidthstring 00.00.0000
+
+Master updates adminstrative quota file and reply to slave.
+\layout List
+\labelwidthstring 00.00.0000
+
+Slave updates local operational quota file.
+\layout Subsubsection
+
+Master recovery
+\layout List
+\labelwidthstring 00.00.0000
+
+Master enquires all slaves' operational limits by issuing a new RPC.
+
+\series bold 
+\layout List
+\labelwidthstring 00.00.0000
+
+Slave replies with limit.
+\layout List
+\labelwidthstring 00.00.0000
+
+Master updates administrative quota file.
+\layout Section
+
+State considerations
+\layout Subsection
+
+Node state
+\layout Subsection
+
+Context state
+\layout Section
+
+Logic specification
+\layout Standard
+
+The quota implementation falls into a few, almost separate, components.
+\layout Standard
+
+
+\series bold 
+ORDER OF IMPLEMENTATION
+\layout Enumerate
+
+Administrative utilities, with sufficient flexibility to create unit test
+ cases 
+\layout Enumerate
+
+Administrative quota file implementation
+\layout Enumerate
+
+OSS enforcement of quota (can be tested separately)
+\layout Enumerate
+
+client - OSS protocol
+\layout Enumerate
+
+quota context
+\layout Enumerate
+
+quota acquire release protocol
+\layout Enumerate
+
+MDS-OST setattr calls
+\layout Enumerate
+
+comprehensive testing of use cases
+\layout Enumerate
+
+recovery protocol
+\layout Enumerate
+
+soft limit
+\layout Subsection
+
+Administrative utilities
+\layout Standard
+
+For all of the following commands it is probably useful to define a single
+ datastructure that has enough fields to hold all the data that needs to
+ be transfered.
+\layout Description
+
+Top\SpecialChar ~
+priority
+\layout Enumerate
+
+All utilities are either:
+\begin_deeper 
+\layout Enumerate
+
+file system ioctls - where non-standard Lustre specific info is needed (e.g.
+ listing)
+\layout Enumerate
+
+standard quotactl interfaces
+\end_deeper 
+\layout Enumerate
+
+A lustre obd_iocontrol will allow an MDS to initiate quota check or quotaon
+ operations on all OST's.
+ It should be possible to issue this ioctl as a file system ioctl on a client,
+ or giving an MDS device on an MDS.
+\series bold 
+NOTE: 
+\series default 
+This rpc can be the same as the master to slave recovery enquiry rpc defined
+ below.
+\layout Enumerate
+
+an obd_iocontrol and special lfs is needed to display usage & limits related
+ to quota for a uid/gid on all nodes in the cluster.
+ This needs to be added to lfs and need to be a command that can be issued
+ from a file system client.
+\layout Enumerate
+
+a command is needed to set the limits for a uid/gid, perhaps based on a
+ template.
+ The limits need to be set on the master and in the limit database.
+ All slaves need to be notified that quota tracking for the uid/gid is now
+ in effect (perhaps by increasing quota limits on the node to a non-zero
+ value).
+ Similarly it should be possible to disable quota for a uid / gid.
+\layout Enumerate
+
+Documentation for all of these will be implemented as manual page extensions
+ and as part of the Lustre Users Guide.
+\layout Enumerate
+
+A chown.chgrp utility.
+ Build a small c utility that stats a file and then issues the chown/chgrp
+ system call to change the ownder/group on the file.
+ This is issued from a client, in conjunction with running a find command
+ to initialize ownership.
+ This can only be run after the MDS has been changed to incorporate part
+ 3.3
+\layout Subsection
+
+Adminstrative quota file & disk file system quota
+\layout Enumerate
+
+The administrative quota file will be a quota file similar to ext3 based
+ quota files with the usual VFS determined tree format.
+\layout Enumerate
+
+The VFS quota api will be adapted to enable the administrative commands
+ to create quota files by name and operate on them without sb (super block)
+ or dquot quota context arguments as required.
+\layout Enumerate
+
+
+\series bold 
+(Design this, but implementation is second priority) 
+\series default 
+Quota check will be adapted to handle checking on a live file system, as
+ follows:
+\begin_deeper 
+\layout Enumerate
+
+if inodes are not checked in sequence order (1,2,3, etc) the following is
+ probably not possible.
+\layout Enumerate
+
+block all operations on an inode while it is being 
+\begin_inset Quotes eld
+\end_inset 
+
+checked
+\begin_inset Quotes erd
+\end_inset 
+
+.
+\layout Enumerate
+
+account for quota on inodes that are already checked
+\layout Enumerate
+
+do not account on inodes that are not yet checked
+\end_deeper 
+\layout Subsection
+
+OSS enforcement
+\layout Enumerate
+
+The direct I/O and truncate calls on the OSS will enforce quota
+\layout Subsection
+
+Client OST/MDT protocol
+\layout Standard
+
+The following component can initially be implemented based on quota status
+ codes returned by the disk file system.
+ In due course the status of quota will be determined by the acquire calls
+ made in the OST or obdfilter.
+\layout Enumerate
+
+All writes functions executed on OST's track quota for newly allocated space.
+\layout Enumerate
+
+If a client flushes a page cache to an OST the data will be written (even
+ if quota are exceeded).
+ The mount flags allowing root to squash quota should be used for this.
+\layout Enumerate
+
+If a client exceeds quota, a return code will indicate that the for that
+ further writes for files owned by that uid/gid must now be done synchronously.
+\layout Enumerate
+
+If quota limits on the OSS are sufficient again, through removal of files
+ or enlarging limits, the flag must be cleared.
+\layout Enumerate
+
+For MDC file quota are currently handled synchronously on the server.
+\layout Subsection
+
+Quota context and server quota enforcement
+\layout Enumerate
+
+The MDS will automatically track block quota associated with directories.
+ It is important the llog files are owned by root users and not subject
+ to quota
+\layout Enumerate
+
+For root root owned files, Lustre quota should not be enabled (there are
+ too many administratively controlled root-owned files right now).
+\layout Enumerate
+
+There will be an active 
+\series bold 
+quota context
+\series default 
+ for a uid or gid for which quota operations are in progress.
+ Processes acquiring quota will find the context for that user or group
+ and wait on the context intelligently and not all fire RPC's to the master.
+ The context should also intelligently handle recovery operations running
+ concurrently with normal quota use.
+\layout Subsection
+
+Slave to Master acquire / release protocol
+\layout Enumerate
+
+Tunables
+\begin_deeper 
+\layout Enumerate
+
+All servers will have tunables for qunits and early acquisition of more
+ qunits.
+\layout Enumerate
+
+The tunables can be set to configurable values through lconf, one set of
+ values for slave behavior, one for master behavior each separated for OSS
+ nodes, one for MDS nodes, as part of the configuration zeroconfig llog.
+\layout Enumerate
+
+The tunables can also be adjusted dynamically in /proc.
+\layout Enumerate
+
+Adjusting through proc only is not acceptable.
+\end_deeper 
+\layout Enumerate
+
+There will be a function that determines the master node for a given uid
+ or gid.
+ For the 1.4 branch this function is always returning the MDS, but it will
+ be designed to make it easy to adapt to clustered metadata.
+\layout Enumerate
+
+There will be dqacq and dqrel rpc's initiated by slave nodes.
+ The code will be organized so that it can be run on slave OSS and slave
+ MDS nodes without modification.
+ These functions will increase / decreate the local limits and administrative
+ usage on master.
+\layout Enumerate
+
+A unit test program will run a collection of not less than 3 slaves and
+ a master through a sequence of interesting acquisitions and releases.
+\layout Subsection
+
+Full integration and system testing
+\layout Enumerate
+
+Full unit tests for all components.
+\layout Enumerate
+
+Demonstrate successful handling of recovery from exceeding soft and hard
+ limits.
+\layout Subsection
+
+MDS - OST setattr calls
+\layout Enumerate
+
+When the MDS creates or chown a file it will queue an asynchronous obd_setattr
+ rpc to the RPC that:
+\begin_deeper 
+\layout Enumerate
+
+changes the owner/group of the objects for the file.
+\layout Enumerate
+
+transfers the storage id (ask Yury for data type) to the OSS (this is in
+ the create case only).
+ It writes the storage id in an EA.
+\end_deeper 
+\layout Enumerate
+
+The obd_setattr calls will be journaled almost exactly like mds_unlink calls
+ in an llog (except that for unlink presently the client unlinks the objects)
+ and records will be canceled when the setattr commands commit to disk on
+ the OST.
+\layout Enumerate
+
+The obd_setattr rpc's will be queued on an RPC set for asynchronous completion,
+ i.e.
+ the MDS will reply to the client without waiting for the result.
+ The simple strategy (
+\begin_inset Quotes eld
+\end_inset 
+
+chown, even if user goes over quota
+\begin_inset Quotes erd
+\end_inset 
+
+, see ERS) will be followed.
+\layout Enumerate
+
+For this part not more than 4 (four) lines of code may be added to mds_open.
+ Adding 0 lines to this function (the longest in Lustre) would be better.
+\layout Enumerate
+
+Demonstrate handling recovery of 300,000 orphaned chown operations while
+ the cluster is in use already.
+\layout Subsection
+
+Server Node Recovery
+\layout Standard
+
+Note: in CMD nodes will be slaves for some uids and masters for others.
+ The algorithm outlined here handles the general case.
+\layout Enumerate
+
+Nodes will recovery quota asynchronously, ie.
+ they will start normal operations, without waiting for quota recovery to
+ complete.
+\layout Enumerate
+
+
+\series bold 
+Slave recovery initiation:
+\begin_deeper 
+\layout Enumerate
+
+Slave recovery is initiated on a per-connection basis
+\begin_deeper 
+\layout Enumerate
+
+Upon obtaining a new connection to a server node that can be a master during
+ normal operations
+\layout Enumerate
+
+Upon entering normal operations while connections are present 
+\end_deeper 
+\layout Enumerate
+
+The recovery is aborted if a connection fails.
+\layout Enumerate
+
+A collection of threads is needed to handle this recovery
+\layout Enumerate
+
+The quota file handling should be sufficiently concurrent that multiple
+ connections can recover in parallel
+\end_deeper 
+\layout Enumerate
+
+
+\series bold 
+Slave recovery:
+\series default 
+\begin_deeper 
+\layout Enumerate
+
+During normal use the node will iterate through all the users and groups
+ in the operational quota file.
+\layout Enumerate
+
+If the connection is not one to the master for this uid/gid go to the next
+ uid/gid.
+\layout Enumerate
+
+If such a uid/gid is also found in the node's administrative quota file,
+ this node is the master for that id and this id will be skipped, else continue
+\series bold 
+.
+\layout Enumerate
+
+Release unreasonably high limits for this uid/gid.
+\layout Enumerate
+
+The contexts used for updating quota from the filter should be design so
+ that these releases can be made concurrent with normal use.
+\end_deeper 
+\layout Enumerate
+
+
+\series bold 
+Master recovery initiation
+\begin_deeper 
+\layout Enumerate
+
+Master recovery requires connections to all other servers, it is initiated:
+\begin_deeper 
+\layout Enumerate
+
+If upon entering normal operations all connections are present
+\layout Enumerate
+
+If during normal operation all connections reach a usable state
+\end_deeper 
+\layout Enumerate
+
+It is aborted if any connection fails during master recovery
+\end_deeper 
+\layout Enumerate
+
+
+\series bold 
+Master recovery:
+\begin_deeper 
+\layout Enumerate
+
+During normal use the master will iterate through the administrative quota
+ file.
+\layout Enumerate
+
+It will lock quota operations on the master for that uid.
+\layout Enumerate
+
+For each uid/gid found it will make 
+\series bold 
+a new quota related master to slave 
+\series default 
+RPC to all other servers and ask for the current limit (and usage).
+\layout Enumerate
+
+If a response is obtained from all nodes, the operational limit on the master
+ node is updated so that the sum of all operational limits is the clusterwide
+ administrative limit.
+\layout Enumerate
+
+If a response is not obtained from all servers, abort.
+\end_deeper 
+\layout Subsection
+
+Soft Limits
+\layout Standard
+
+Soft quota is not enforced in fs layer on master or slave.
+ It's only enforced in obd layer on Master:
+\layout Enumerate
+
+The grace time and soft start time will be kept in adminstrative file.
+\layout Enumerate
+
+Master monitor the administrative usage on each qunit acquire/release handling:
+ log the soft start time once the administrative usage >= administrative
+ soft limit, clear the soft start time once the administrative usage < administr
+ative soft limit.
+\layout Enumerate
+
+Master will reject any qunit acquire request if soft start time + grace
+ time < current time.
+\layout Standard
+
+Make sure we have unit tests and integration and system tests that verify
+ this comprehensively.
+\layout Section
+
+Changelog
+\layout Description
+
+2005/01/29 First draft.
+ Based on review of Zhaohongs writings and ERS.
+\layout Description
+
+2005/02/06 Second draft, much more detail to aid the team
+\the_end
index de915f8..d6f0cd9 100644 (file)
@@ -724,5 +724,4 @@ int liblustre_wait_event(int timeout);
 #include <linux/lustre_export.h>
 #include <linux/lustre_net.h>
 
-
 #endif
index 4018e67..f0d145f 100644 (file)
@@ -14,4 +14,5 @@ EXTRA_DIST = lprocfs_status.h lustre_debug.h lustre_ha.h lustre_lib.h \
   lustre_net.h obd_class.h obd_ost.h obd_support.h lustre_commit_confd.h \
   lustre_export.h lustre_log.h obd_echo.h \
   lustre_compat25.h lustre_fsfilt.h lustre_import.h lustre_mds.h obd.h \
-  lvfs.h lvfs_linux.h lustre_cfg.h lustre_lite.h  lustre_idl.h
+  lvfs.h lvfs_linux.h lustre_cfg.h lustre_lite.h  lustre_idl.h \
+  lustre_quota.h
index 86774a9..ceb3a41 100644 (file)
@@ -78,6 +78,13 @@ struct fsfilt_operations {
         int     (* fs_read_record)(struct file *, void *, int size, loff_t *);
         int     (* fs_setup)(struct super_block *sb);
         int     (* fs_get_op_len)(int, struct fsfilt_objinfo *, int);
+        int     (* fs_quotacheck)(struct super_block *sb,
+                                  struct obd_quotactl *oqctl);
+        int     (* fs_quotactl)(struct super_block *sb,
+                                struct obd_quotactl *oqctl);
+        int     (* fs_quotainfo)(struct lustre_quota_info *lqi, int type, 
+                                 int cmd);
+        int     (* fs_dquot)(struct lustre_dquot *dquot, int cmd);
 };
 
 extern int fsfilt_register_ops(struct fsfilt_operations *fs_ops);
@@ -290,6 +297,33 @@ static inline int fsfilt_sync(struct obd_device *obd, struct super_block *sb)
         return obd->obd_fsops->fs_sync(sb);
 }
 
+static inline int fsfilt_quotacheck(struct obd_device *obd,
+                                    struct super_block *sb,
+                                    struct obd_quotactl *oqctl)
+{
+       return obd->obd_fsops->fs_quotacheck(sb, oqctl);
+}
+
+static inline int fsfilt_quotactl(struct obd_device *obd,
+                                  struct super_block *sb,
+                                  struct obd_quotactl *oqctl)
+{
+       return obd->obd_fsops->fs_quotactl(sb, oqctl);
+}
+
+static inline int fsfilt_quotainfo(struct obd_device *obd,
+                                   struct lustre_quota_info *lqi,
+                                   int type, int cmd)
+{
+        return obd->obd_fsops->fs_quotainfo(lqi, type, cmd);
+}
+
+static inline int fsfilt_dquot(struct obd_device *obd,
+                               struct lustre_dquot *dquot, int cmd)
+{
+        return obd->obd_fsops->fs_dquot(dquot, cmd);
+}
+
 static inline int fsfilt_map_inode_pages(struct obd_device *obd,
                                          struct inode *inode,
                                          struct page **page, int pages,
index a5d4405..7a7688a 100644 (file)
@@ -234,6 +234,8 @@ typedef enum {
         OST_SAN_WRITE  = 15,
         OST_SYNC       = 16,
         OST_SET_INFO   = 17,
+        OST_QUOTACHECK = 18,
+        OST_QUOTACTL   = 19,
         OST_LAST_OPC
 } ost_cmd_t;
 #define OST_FIRST_OPC  OST_REPLY
@@ -251,9 +253,11 @@ typedef uint32_t        obd_gid;
 typedef uint32_t        obd_flag;
 typedef uint32_t        obd_count;
 
-#define OBD_FL_DELORPHAN    (0x00000004) /* if set in o_flags delete orphans */
-#define OBD_FL_RECREATE_OBJS (0x00000020) // recreate missing obj
-#define OBD_FL_DEBUG_CHECK  (0x00000040) /* echo client/server debug check */
+#define OBD_FL_DELORPHAN     (0x00000004) /* if set in o_flags delete orphans */
+#define OBD_FL_RECREATE_OBJS (0x00000020) /* recreate missing obj */
+#define OBD_FL_DEBUG_CHECK   (0x00000040) /* echo client/server debug check */
+#define OBD_FL_NO_USRQUOTA   (0x00000100) /* the object's owner is over quota */
+#define OBD_FL_NO_GRPQUOTA   (0x00000200) /* the object's group is over quota */
 
 #define OBD_INLINESZ    64
 
@@ -342,9 +346,12 @@ struct lov_mds_md_v1 {            /* LOV EA mds/wire data (little-endian) */
 #define OBD_MD_FLEPOCH  (0x04000000)    /* ->ost write easize is epoch */
 #define OBD_MD_FLGRANT  (0x08000000)    /* ost preallocation space grant */
 #define OBD_MD_FLDIREA  (0x10000000)    /* dir's extended attribute data */
+#define OBD_MD_FLUSRQUOTA  (0x20000000)  
+#define OBD_MD_FLGRPQUOTA  (0x40000000) /* over quota flags sent back by ost */
 #define OBD_MD_FLNOTOBD (~(OBD_MD_FLBLOCKS | OBD_MD_LINKNAME|\
                            OBD_MD_FLEASIZE | OBD_MD_FLHANDLE | OBD_MD_FLCKSUM|\
-                           OBD_MD_FLQOS | OBD_MD_FLOSCOPQ | OBD_MD_FLCOOKIE))
+                           OBD_MD_FLQOS | OBD_MD_FLOSCOPQ | OBD_MD_FLCOOKIE|\
+                           OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA))
 
 
 static inline struct lustre_handle *obdo_handle(struct obdo *oa)
@@ -378,14 +385,14 @@ extern void lustre_swab_obd_statfs (struct obd_statfs *os);
 
 /* ost_body.data values for OST_BRW */
 
-#define OBD_BRW_READ       0x01
-#define OBD_BRW_WRITE      0x02
-#define OBD_BRW_RWMASK     (OBD_BRW_READ | OBD_BRW_WRITE)
-#define OBD_BRW_SYNC       0x08
-#define OBD_BRW_CHECK      0x10
-#define OBD_BRW_FROM_GRANT 0x20 /* the osc manages this under llite */
-#define OBD_BRW_GRANTED    0x40 /* the ost manages this */
-#define OBD_BRW_DROP       0x80 /* drop the page after IO */
+#define OBD_BRW_READ            0x01
+#define OBD_BRW_WRITE           0x02
+#define OBD_BRW_RWMASK          (OBD_BRW_READ | OBD_BRW_WRITE)
+#define OBD_BRW_SYNC            0x08
+#define OBD_BRW_CHECK           0x10
+#define OBD_BRW_FROM_GRANT      0x20 /* the osc manages this under llite */
+#define OBD_BRW_GRANTED         0x40 /* the ost manages this */
+#define OBD_BRW_DROP            0x80 /* drop the page after IO */
 
 #define OBD_OBJECT_EOF 0xffffffffffffffffULL
 
@@ -453,6 +460,8 @@ typedef enum {
         MDS_SYNC         = 44,
         MDS_DONE_WRITING = 45,
         MDS_SET_INFO     = 46,
+        MDS_QUOTACHECK   = 47,
+        MDS_QUOTACTL     = 48,
         MDS_LAST_OPC
 } mds_cmd_t;
 
@@ -528,6 +537,49 @@ struct mds_body {
 
 extern void lustre_swab_mds_body (struct mds_body *b);
 
+/* XXX: same as if_dqinfo struct in kernel */
+struct obd_dqinfo {
+        __u64 dqi_bgrace;
+        __u64 dqi_igrace;
+        __u32 dqi_flags;
+        __u32 dqi_valid;
+};
+
+/* XXX: same as if_dqblk struct in kernel, plus one padding */
+struct obd_dqblk {
+        __u64 dqb_bhardlimit;
+        __u64 dqb_bsoftlimit;
+        __u64 dqb_curspace;
+        __u64 dqb_ihardlimit;
+        __u64 dqb_isoftlimit;
+        __u64 dqb_curinodes;
+        __u64 dqb_btime;
+        __u64 dqb_itime;
+        __u32 dqb_valid;
+        __u32 padding;
+};
+
+#define Q_QUOTACHECK    0x800100
+#define Q_INITQUOTA     0x800101        /* init slave limits */
+#define Q_GETOINFO      0x800102        /* get obd quota info */
+#define Q_GETOQUOTA     0x800103        /* get obd quotas */
+
+#define Q_TYPESET(oqc, type) \
+        ((oqc)->qc_type == type || (oqc)->qc_type == UGQUOTA)
+
+#define Q_GETOCMD(oqc) \
+        ((oqc)->qc_cmd == Q_GETOINFO || (oqc)->qc_cmd == Q_GETOQUOTA)
+
+struct obd_quotactl {
+        __u32                   qc_cmd;
+        __u32                   qc_type;
+        __u32                   qc_id;
+        __u32                   qc_stat;
+        struct obd_dqinfo       qc_dqinfo;
+        struct obd_dqblk        qc_dqblk;
+};
+
+extern void lustre_swab_obd_quotactl(struct obd_quotactl *q);
 
 struct mds_rec_setattr {
         __u32           sa_opcode;
@@ -826,6 +878,7 @@ typedef enum {
 typedef enum {
         OBD_PING = 400,
         OBD_LOG_CANCEL,
+        OBD_QC_CALLBACK,
         OBD_LAST_OPC
 } obd_cmd_t;
 #define OBD_FIRST_OPC OBD_PING
@@ -857,6 +910,7 @@ typedef enum {
         OST_SZ_REC       = LLOG_OP_MAGIC | (OST_SAN_WRITE << 8),
         OST_RAID1_REC    = LLOG_OP_MAGIC | ((OST_SAN_WRITE + 1) << 8),
         MDS_UNLINK_REC   = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) | REINT_UNLINK,
+        MDS_SETATTR_REC  = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) | REINT_SETATTR,
         OBD_CFG_REC      = LLOG_OP_MAGIC | 0x20000,
         PTL_CFG_REC      = LLOG_OP_MAGIC | 0x30000,
         LLOG_GEN_REC     = LLOG_OP_MAGIC | 0x40000,
@@ -921,6 +975,16 @@ struct llog_unlink_rec {
         struct llog_rec_tail    lur_tail;
 } __attribute__((packed));
 
+struct llog_setattr_rec {
+        struct llog_rec_hdr     lsr_hdr;
+        obd_id                  lsr_oid;
+        obd_count               lsr_ogen;
+        __u32                   lsr_uid;
+        __u32                   lsr_gid;
+        __u32                   padding;
+        struct llog_rec_tail    lsr_tail;
+} __attribute__((packed));
+
 struct llog_size_change_rec {
         struct llog_rec_hdr     lsc_hdr;
         struct ll_fid           lsc_fid;
@@ -1027,4 +1091,18 @@ static inline struct ll_fid *obdo_fid(struct obdo *oa)
                                  sizeof(struct llog_cookie));
 }
 
+/* qutoa */
+struct qunit_data {
+       __u32 qd_id;
+       __u32 qd_type;
+       __u32 qd_count;
+       __u32 qd_isblk; /* indicating if it's block quota */
+};
+extern void lustre_swab_qdata(struct qunit_data *d);
+
+typedef enum {
+        QUOTA_DQACQ     = 601,
+        QUOTA_DQREL     = 602,
+} quota_cmd_t;
+
 #endif
index 380629a..86e0274 100644 (file)
@@ -83,6 +83,10 @@ int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
 int target_handle_ping(struct ptlrpc_request *req);
 void target_committed_to_req(struct ptlrpc_request *req);
 
+/* quotacheck callback, dqacq/dqrel callback handler */
+int target_handle_qc_callback(struct ptlrpc_request *req);
+int target_handle_dqacq_callback(struct ptlrpc_request *req);
+
 void target_cancel_recovery_timer(struct obd_device *obd);
 
 #define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */
@@ -438,9 +442,13 @@ static inline void obd_ioctl_freedata(char *buf, int len)
 
 #define OBD_IOC_CLOSE_UUID             _IOWR ('f', 147, long)
 
-#define OBD_IOC_LOV_SETSTRIPE            _IOW ('f', 154, long)
-#define OBD_IOC_LOV_GETSTRIPE            _IOW ('f', 155, long)
-#define OBD_IOC_LOV_SETEA                _IOW ('f', 156, long)
+#define OBD_IOC_LOV_SETSTRIPE          _IOW ('f', 154, long)
+#define OBD_IOC_LOV_GETSTRIPE          _IOW ('f', 155, long)
+#define OBD_IOC_LOV_SETEA              _IOW ('f', 156, long)
+
+#define OBD_IOC_QUOTACHECK             _IOW ('f', 160, int)
+#define OBD_IOC_POLL_QUOTACHECK        _IOR ('f', 161, struct if_quotacheck *)
+#define OBD_IOC_QUOTACTL               _IOWR('f', 162, struct if_quotactl *)
 
 #define OBD_IOC_MOUNTOPT               _IOWR('f', 170, long)
 
index 9266f13..9864e5d 100644 (file)
@@ -68,8 +68,14 @@ struct llog_handle {
         } u;
 };
 
+struct llog_fill_rec_data {
+        obd_id          lfd_id;         /* object id */
+        obd_count       lfd_ogen;       /* object group */
+};
+
 /* llog.c  -  general API */
 typedef int (*llog_cb_t)(struct llog_handle *, struct llog_rec_hdr *, void *);
+typedef int (*llog_fill_rec_cb_t)(struct llog_rec_hdr *rec, void *data);
 int llog_init_handle(struct llog_handle *handle, int flags,
                      struct obd_uuid *uuid);
 int llog_process(struct llog_handle *loghandle, llog_cb_t cb,
@@ -106,7 +112,7 @@ int llog_cleanup(struct llog_ctxt *);
 int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp);
 int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec,
              struct lov_stripe_md *lsm, struct llog_cookie *logcookies,
-             int numcookies);
+             int numcookies, llog_fill_rec_cb_t fill_cb);
 int llog_cancel(struct llog_ctxt *, struct lov_stripe_md *lsm,
                 int count, struct llog_cookie *cookies, int flags);
 
@@ -116,7 +122,8 @@ int llog_obd_origin_setup(struct obd_device *obd, int index,
 int llog_obd_origin_cleanup(struct llog_ctxt *ctxt);
 int llog_obd_origin_add(struct llog_ctxt *ctxt,
                         struct llog_rec_hdr *rec, struct lov_stripe_md *lsm,
-                        struct llog_cookie *logcookies, int numcookies);
+                        struct llog_cookie *logcookies, int numcookies,
+                        llog_fill_rec_cb_t fill_cb);
 
 int llog_cat_initialize(struct obd_device *obd, int count);
 int obd_llog_init(struct obd_device *obd, struct obd_device *disk_obd,
@@ -166,7 +173,8 @@ struct llog_operations {
         int (*lop_cleanup)(struct llog_ctxt *ctxt);
         int (*lop_add)(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec,
                        struct lov_stripe_md *lsm,
-                       struct llog_cookie *logcookies, int numcookies);
+                       struct llog_cookie *logcookies, int numcookies,
+                       llog_fill_rec_cb_t fill_cb);
         int (*lop_cancel)(struct llog_ctxt *ctxt, struct lov_stripe_md *lsm,
                           int count, struct llog_cookie *cookies, int flags);
         int (*lop_connect)(struct llog_ctxt *ctxt, int count,
diff --git a/lustre/include/linux/lustre_quota.h b/lustre/include/linux/lustre_quota.h
new file mode 100644 (file)
index 0000000..7c51557
--- /dev/null
@@ -0,0 +1,177 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _LUSTRE_QUOTA_H
+#define _LUSTRE_QUOTA_H
+
+#include <linux/version.h>
+#include <linux/quota.h>
+#include <linux/lustre_idl.h>
+
+/* XXX disable amdin quotafile delete dquot temporarily */
+#define QFMT_NO_DELETE 1
+
+#define QUSG(count, isblk)      (isblk ? toqb(count) : count)
+
+/* If the (quota limit < qunit * slave count), the slave which can't
+ * acquire qunit should set it's local limit as MIN_QLIMIT */
+#define MIN_QLIMIT      1
+
+#ifndef NR_DQHASH
+#define NR_DQHASH 45
+#endif
+
+/* structures to access admin quotafile */
+struct lustre_mem_dqinfo {
+        unsigned int dqi_bgrace;
+        unsigned int dqi_igrace;
+        unsigned long dqi_flags;
+        unsigned int dqi_blocks;
+        unsigned int dqi_free_blk;
+        unsigned int dqi_free_entry;
+};
+
+struct lustre_quota_info {
+        struct semaphore qi_sem;
+        struct file *qi_files[MAXQUOTAS];
+        struct lustre_mem_dqinfo qi_info[MAXQUOTAS];
+};
+
+struct lustre_dquot {
+        struct list_head dq_hash;
+        struct list_head dq_unused;
+
+        /* this semaphore is unused until we implement wb dquot cache */
+        struct semaphore dq_sem;
+        atomic_t dq_refcnt;
+
+        struct lustre_quota_info *dq_info;
+        loff_t dq_off;
+        unsigned int dq_id;
+        int dq_type;
+        unsigned long dq_flags;
+        struct mem_dqblk dq_dqb;
+};
+
+#define QFILE_CHK               1
+#define QFILE_RD_INFO           2
+#define QFILE_WR_INFO           3
+#define QFILE_INIT_INFO         4
+#define QFILE_RD_DQUOT          5
+#define QFILE_WR_DQUOT          6
+
+/* admin quotafile operations */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
+int lustre_check_quota_file(struct lustre_quota_info *lqi, int type);
+int lustre_read_quota_info(struct lustre_quota_info *lqi, int type);
+int lustre_write_quota_info(struct lustre_quota_info *lqi, int type);
+int lustre_read_dquot(struct lustre_dquot *dquot);
+int lustre_commit_dquot(struct lustre_dquot *dquot);
+int lustre_init_quota_info(struct lustre_quota_info *lqi, int type);
+
+#else
+
+#ifndef DQ_FAKE_B
+#define DQ_FAKE_B       6
+#endif
+
+static inline int lustre_check_quota_file(struct lustre_quota_info *lqi,
+                                          int type)
+{
+        return 0;
+}
+static inline int lustre_read_quota_info(struct lustre_quota_info *lqi,
+                                         int type)
+{
+        return 0;
+}
+static inline int lustre_write_quota_info(struct lustre_quota_info *lqi,
+                                          int type)
+{
+        return 0;
+}
+static inline int lustre_read_dquot(struct lustre_dquot *dquot)
+{
+        return 0;
+}
+static inline int lustre_commit_dquot(struct lustre_dquot *dquot)
+{
+        return 0;
+}
+static inline int lustre_init_quota_info(struct lustre_quota_info *lqi,
+                                         int type)
+{
+        return 0;
+}
+#endif                          /* KERNEL_VERSION(2,5,0) */
+
+/* quota context structures */
+struct obd_device;
+typedef int (*dqacq_handler_t) (struct obd_device * obd, struct qunit_data * qd,
+                                int opc);
+
+struct lustre_quota_ctxt {
+        struct super_block *lqc_sb;
+        struct obd_import *lqc_import;
+        dqacq_handler_t lqc_handler;
+        unsigned long lqc_flags;
+        unsigned long lqc_iunit_sz;
+        unsigned long lqc_itune_sz;
+        unsigned long lqc_bunit_sz;
+        unsigned long lqc_btune_sz;
+};
+
+struct lustre_qunit {
+        struct list_head lq_hash;
+        atomic_t lq_refcnt;
+        struct lustre_quota_ctxt *lq_ctxt;
+        struct qunit_data lq_data;
+        unsigned int lq_opc;
+        struct list_head lq_waiters;
+};
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
+int qctxt_init(struct lustre_quota_ctxt *qctxt, struct super_block *sb,
+               dqacq_handler_t handler);
+void qctxt_cleanup(struct lustre_quota_ctxt *qctxt, int force);
+int qctxt_adjust_qunit(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
+                       uid_t uid, gid_t gid, __u32 isblk);
+int qctxt_wait_on_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
+                        uid_t uid, gid_t gid, __u32 isblk);
+#else
+static inline int qctxt_init(struct lustre_quota_ctxt *qctxt,
+                             struct super_block *sb, dqacq_handler_t handler)
+{
+        return 0;
+}
+static inline void qctxt_cleanup(struct lustre_quota_ctxt *qctxt, int force)
+{
+        return;
+}
+static inline int qctxt_adjust_qunit(struct obd_device *obd,
+                                     struct lustre_quota_ctxt *qctxt,
+                                     uid_t uid, gid_t gid, __u32 isblk)
+{
+        return 0;
+}
+static inline int qctxt_wait_on_dqacq(struct obd_device *obd,
+                                      struct lustre_quota_ctxt *qctxt,
+                                      uid_t uid, gid_t gid, __u32 isblk)
+{
+        return 0;
+}
+#endif                          /* KERNEL_VERSION(2,5,0) */
+
+/* quota check & quotactl */
+#define LUSTRE_ADMIN_QUOTAFILES {\
+       "admin_quotafile.usr",  /* user admin quotafile */\
+       "admin_quotafile.grp"   /* group admin quotafile */\
+}
+
+struct quotacheck_info {
+        struct completion qi_starting;
+        struct obd_export *qi_exp;
+        struct obd_quotactl qi_oqctl;
+};
+
+#endif                          /* _LUSTRE_QUOTA_H */
index 0ac8a9e..4dca21b 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/lustre_lib.h>
 #include <linux/lustre_idl.h>
 #include <linux/lustre_export.h>
+#include <linux/lustre_quota.h>
 
 /* this is really local to the OSC */
 struct loi_oap_pages {
@@ -129,6 +130,7 @@ struct obd_async_page_ops {
         int  (*ap_refresh_count)(void *data, int cmd);
         void (*ap_fill_obdo)(void *data, int cmd, struct obdo *oa);
         void (*ap_completion)(void *data, int cmd, struct obdo *oa, int rc);
+        void (*ap_get_ucred)(void *data, struct obd_ucred *ouc);
 };
 
 /* the `oig' is passed down from a caller of obd rw methods.  the callee
@@ -222,6 +224,10 @@ struct filter_obd {
         struct obd_histogram     fo_w_discont_blocks;
         struct obd_histogram     fo_r_disk_iosize;
         struct obd_histogram     fo_w_disk_iosize;
+
+        struct lustre_quota_ctxt fo_quota_ctxt;
+        spinlock_t               fo_quotacheck_lock;
+        atomic_t                 fo_quotachecking;
 };
 
 struct mds_server_data;
@@ -231,6 +237,11 @@ struct mds_server_data;
 #define OSC_MAX_DIRTY_DEFAULT    32
 #define OSC_MAX_DIRTY_MB_MAX    512     /* totally arbitrary */
 
+enum {
+        CL_QUOTACHECKING = 1,
+        CL_NO_QUOTACHECK
+};
+
 struct mdc_rpc_lock;
 struct client_obd {
         struct obd_import       *cl_import;
@@ -282,6 +293,10 @@ struct client_obd {
 
         /* also protected by the poorly named _loi_list_lock lock above */
         struct osc_async_rc      cl_ar;
+
+        /* used by quotacheck */
+        spinlock_t               cl_qchk_lock;
+        int                      cl_qchk_stat; /* quotacheck stat of the peer */
 };
 
 /* Like a client, with some hangers-on.  Keep mc_client_obd first so that we
@@ -330,6 +345,9 @@ struct mds_obd {
         struct file                     *mds_lov_objid_filp;
         unsigned long                   *mds_client_bitmap;
         struct semaphore                 mds_orphan_recovery_sem;
+        struct lustre_quota_info         mds_quota_info;
+        struct lustre_quota_ctxt         mds_quota_ctxt;
+        atomic_t                         mds_quotachecking;
 };
 
 struct echo_obd {
@@ -460,18 +478,18 @@ static inline void oti_free_cookies(struct obd_trans_info *oti)
 
 /* llog contexts */
 enum llog_ctxt_id {
-        LLOG_CONFIG_ORIG_CTXT =  0,
-        LLOG_CONFIG_REPL_CTXT =  1,
-        LLOG_UNLINK_ORIG_CTXT =  2,
-        LLOG_UNLINK_REPL_CTXT =  3,
-        LLOG_SIZE_ORIG_CTXT   =  4,
-        LLOG_SIZE_REPL_CTXT   =  5,
-        LLOG_MD_ORIG_CTXT     =  6,
-        LLOG_MD_REPL_CTXT     =  7,
-        LLOG_RD1_ORIG_CTXT    =  8,
-        LLOG_RD1_REPL_CTXT    =  9,
-        LLOG_TEST_ORIG_CTXT   = 10,
-        LLOG_TEST_REPL_CTXT   = 11,
+        LLOG_CONFIG_ORIG_CTXT  =  0,
+        LLOG_CONFIG_REPL_CTXT  =  1,
+        LLOG_MDS_OST_ORIG_CTXT =  2,
+        LLOG_MDS_OST_REPL_CTXT =  3,
+        LLOG_SIZE_ORIG_CTXT    =  4,
+        LLOG_SIZE_REPL_CTXT    =  5,
+        LLOG_MD_ORIG_CTXT      =  6,
+        LLOG_MD_REPL_CTXT      =  7,
+        LLOG_RD1_ORIG_CTXT     =  8,
+        LLOG_RD1_REPL_CTXT     =  9,
+        LLOG_TEST_ORIG_CTXT    = 10,
+        LLOG_TEST_REPL_CTXT    = 11,
         LLOG_MAX_CTXTS
 };
 
@@ -589,6 +607,8 @@ struct obd_ops {
                          struct lov_stripe_md *ea, struct obd_trans_info *oti);
         int (*o_setattr)(struct obd_export *exp, struct obdo *oa,
                          struct lov_stripe_md *ea, struct obd_trans_info *oti);
+        int (*o_setattr_async)(struct obd_export *exp, struct obdo *oa,
+                         struct lov_stripe_md *ea, struct obd_trans_info *oti);
         int (*o_getattr)(struct obd_export *exp, struct obdo *oa,
                          struct lov_stripe_md *ea);
         int (*o_getattr_async)(struct obd_export *exp, struct obdo *oa,
@@ -691,6 +711,11 @@ struct obd_ops {
 
         int (*o_notify)(struct obd_device *obd, struct obd_device *watched,
                         int active);
+
+        /* quota methods */
+        int (*o_quotacheck)(struct obd_export *, struct obd_quotactl *);
+        int (*o_quotactl)(struct obd_export *, struct obd_quotactl *);
+
         /* 
          * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line
          * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c.
index 960fbe5..e7d1afd 100644 (file)
@@ -67,6 +67,8 @@ struct obd_device *class_uuid2obd(struct obd_uuid *uuid);
 struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid, 
                                           char * typ_name,
                                           struct obd_uuid *grp_uuid);
+struct obd_device * class_find_client_notype(struct obd_uuid *tgt_uuid,
+                                             struct obd_uuid *grp_uuid);
 struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, 
                                            int *next);
 
@@ -467,6 +469,21 @@ static inline int obd_setattr(struct obd_export *exp, struct obdo *obdo,
         RETURN(rc);
 }
 
+static inline int obd_setattr_async(struct obd_export *exp, 
+                                    struct obdo *obdo,
+                                    struct lov_stripe_md *ea,
+                                    struct obd_trans_info *oti)
+{
+        int rc;
+        ENTRY;
+                                                                                                                             
+        EXP_CHECK_OP(exp, setattr_async);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, setattr_async);
+                                                                                                                             
+        rc = OBP(exp->exp_obd, setattr_async)(exp, obdo, ea, oti);
+        RETURN(rc);
+}
+
 static inline int obd_add_conn(struct obd_import *imp, struct obd_uuid *uuid,
                                int priority)
 {
@@ -983,6 +1000,33 @@ static inline int obd_notify(struct obd_device *obd,
         return OBP(obd, notify)(obd, watched, active);
 }
 
+static inline int obd_quotacheck(struct obd_export *exp,
+                                 struct obd_quotactl *oqctl)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_OP(exp, quotacheck);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, quotacheck);
+
+        rc = OBP(exp->exp_obd, quotacheck)(exp, oqctl);
+        RETURN(rc);
+} 
+
+static inline int obd_quotactl(struct obd_export *exp,
+                               struct obd_quotactl *oqctl)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_OP(exp, quotactl);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, quotactl);
+
+        rc = OBP(exp->exp_obd, quotactl)(exp, oqctl);
+        RETURN(rc);
+} 
+
+
 static inline int obd_register_observer(struct obd_device *obd,
                                         struct obd_device *observer)
 {
@@ -1010,6 +1054,11 @@ static inline struct obdo *obdo_alloc(void)
         return oa;
 }
 
+/* qunit hash stuff */
+extern kmem_cache_t *qunit_cachep;
+extern struct list_head qunit_hash[];
+extern spinlock_t qunit_hash_lock;
+
 static inline void obdo_free(struct obdo *oa)
 {
         OBD_SLAB_FREE(oa, obdo_cachep, sizeof(*oa));
@@ -1042,4 +1091,5 @@ int class_add_uuid(char *uuid, __u64 nid, __u32 nal);
 int class_del_uuid (char *uuid);
 void class_init_uuidlist(void);
 void class_exit_uuidlist(void);
+
 #endif /* __LINUX_OBD_CLASS_H */
index 4207a5e..2c54309 100644 (file)
@@ -87,6 +87,9 @@ extern wait_queue_head_t obd_race_waitq;
 #define OBD_FAIL_MDS_PAUSE_OPEN          0x129
 #define OBD_FAIL_MDS_STATFS_LCW_SLEEP    0x12a
 #define OBD_FAIL_MDS_OPEN_CREATE         0x12b
+#define OBD_FAIL_MDS_OST_SETATTR         0x12c
+#define OBD_FAIL_MDS_QUOTACHECK_NET      0x12d
+#define OBD_FAIL_MDS_QUOTACTL_NET        0x12e
 
 #define OBD_FAIL_OST                     0x200
 #define OBD_FAIL_OST_CONNECT_NET         0x201
@@ -112,6 +115,8 @@ extern wait_queue_head_t obd_race_waitq;
 #define OBD_FAIL_OST_ENOSPC              0x215
 #define OBD_FAIL_OST_EROFS               0x216
 #define OBD_FAIL_OST_ENOENT              0x217
+#define OBD_FAIL_OST_QUOTACHECK_NET      0x218
+#define OBD_FAIL_OST_QUOTACTL_NET        0x219
 
 #define OBD_FAIL_LDLM                    0x300
 #define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
@@ -145,6 +150,7 @@ extern wait_queue_head_t obd_race_waitq;
 #define OBD_FAIL_OBD_PING_NET            0x600
 #define OBD_FAIL_OBD_LOG_CANCEL_NET      0x601
 #define OBD_FAIL_OBD_LOGD_NET            0x602
+#define OBD_FAIL_OBD_QC_CALLBACK_NET     0x603
 
 #define OBD_FAIL_TGT_REPLY_NET           0x700
 #define OBD_FAIL_TGT_CONN_RACE           0x701
index ab74c1d..6527f40 100644 (file)
@@ -19,5 +19,8 @@ extern int llapi_target_check(int num_types, char **obd_types, char *dir);
 extern int llapi_catinfo(char *dir, char *keyword, char *node_name);
 extern int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count);
 extern int llapi_is_lustre_mnttype(char *type);
-
+extern int llapi_quotacheck(char *mnt, int check_type);
+extern int llapi_poll_quotacheck(char *mnt, struct if_quotacheck *qchk);
+extern int llapi_quotactl(char *mnt, struct if_quotactl *qctl);
+extern int llapi_quotachog(char *path, int flag);
 #endif
index 9de6b72..6d1faa3 100644 (file)
@@ -9,6 +9,7 @@
 #ifndef _LUSTRE_USER_H
 #define _LUSTRE_USER_H
 #include <asm/types.h>
+#include <linux/quota.h>
 #ifdef __KERNEL__
 #include <linux/string.h>
 #else
@@ -37,6 +38,9 @@
 #define LL_IOC_RECREATE_OBJ             _IOW ('f', 157, long)
 #define LL_IOC_GROUP_LOCK               _IOW ('f', 158, long)
 #define LL_IOC_GROUP_UNLOCK             _IOW ('f', 159, long)
+#define LL_IOC_QUOTACHECK               _IOW ('f', 160, int)
+#define LL_IOC_POLL_QUOTACHECK          _IOR ('f', 161, struct if_quotacheck *)
+#define LL_IOC_QUOTACTL                 _IOWR('f', 162, struct if_quotactl *)
 
 #define IOC_MDC_TYPE            'i'
 #define IOC_MDC_GETSTRIPE       _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *)
@@ -118,4 +122,47 @@ static inline void obd_str2uuid(struct obd_uuid *uuid, char *tmp)
         uuid->uuid[sizeof(*uuid) - 1] = '\0';
 }
 
+#define UGQUOTA 2       /* set both USRQUOTA and GRPQUOTA */
+
+#define QFMT_LDISKFS 2  /* QFMT_VFS_V0(2), quota format for ldiskfs */
+
+struct if_quotacheck {
+        char                    obd_type[10];
+        struct obd_uuid         obd_uuid;
+        int                     stat;
+};
+
+#ifndef __KERNEL__
+/* XXX: these two structs should be in /usr/include/linux/quota.h */
+struct if_dqinfo {
+        __u64 dqi_bgrace;
+        __u64 dqi_igrace;
+        __u32 dqi_flags;
+        __u32 dqi_valid;
+};
+
+struct if_dqblk {
+        __u64 dqb_bhardlimit;
+        __u64 dqb_bsoftlimit;
+        __u64 dqb_curspace;
+        __u64 dqb_ihardlimit;
+        __u64 dqb_isoftlimit;
+        __u64 dqb_curinodes;
+        __u64 dqb_btime;
+        __u64 dqb_itime;
+        __u32 dqb_valid;
+};
+#endif
+
+struct if_quotactl {
+        int                     qc_cmd;
+        int                     qc_type;
+        int                     qc_id;
+        int                     qc_stat;
+        struct if_dqinfo        qc_dqinfo;
+        struct if_dqblk         qc_dqblk;
+        char                    obd_type[10];
+        struct obd_uuid         obd_uuid;
+};
+
 #endif /* _LUSTRE_USER_H */
index 8f7188a..6ec26cd 100644 (file)
@@ -252,7 +252,7 @@ Index: linux-bgl/kernel/kallsyms.c
 +   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 +  */
 +
-+#ident "$Id: kallsyms-2.4-bgl.patch,v 1.1.4.1 2004/10/29 00:51:21 jacob Exp $"
++#ident "$Id: kallsyms-2.4-bgl.patch,v 1.1.20.1 2005/03/24 22:50:28 jacob Exp $"
 +
 +/*
 +   This code uses the list of all kernel and module symbols to :-
@@ -568,7 +568,7 @@ Index: linux-bgl/include/linux/kallsyms.h
 +   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 + */
 +
-+#ident "$Id: kallsyms-2.4-bgl.patch,v 1.1.4.1 2004/10/29 00:51:21 jacob Exp $"
++#ident "$Id: kallsyms-2.4-bgl.patch,v 1.1.20.1 2005/03/24 22:50:28 jacob Exp $"
 +
 +#ifndef MODUTILS_KALLSYMS_H
 +#define MODUTILS_KALLSYMS_H 1
index e2aff01..be51da2 100644 (file)
@@ -1,6 +1,6 @@
 default: all
 
-MODULES := ldiskfs
+MODULES := ldiskfs quotafmt_test
 
 # copy makefile over to not break patches
 ext3_extra := $(wildcard @LINUX@/fs/ext3/Makefile)
@@ -11,9 +11,13 @@ linux_headers := $(wildcard @LINUX@/include/linux/ext3*.h)
 ext3_sources := $(filter-out %.mod.c,$(wildcard @LINUX@/fs/ext3/*.c))
 new_sources := iopen.c iopen.h extents.c mballoc.c
 new_headers := ext3_extents.h
-ldiskfs_sources := $(notdir $(ext3_sources) $(ext3_headers)) $(new_sources) $(new_headers)
+quotafmt_sources := lustre_quota_fmt.c
+quotafmt_headers := lustre_quota_fmt.h
+ldiskfs_patched_sources := $(notdir $(ext3_sources) $(ext3_headers)) $(new_sources) $(new_headers)
+ldiskfs_sources := $(ldiskfs_patched_sources) $(quotafmt_sources) $(quotafmt_headers)
 
 ldiskfs-objs := $(filter %.o,$(ldiskfs_sources:.c=.o))
+quotafmt-objs := quotafmt_test.o
 
 EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LUSTRE@ -I@LUSTRE@/ldiskfs
 
index a31e01a..4f9e784 100644 (file)
@@ -6,7 +6,7 @@ endif
 
 ldiskfs_linux_headers := $(addprefix linux/,$(subst ext3,ldiskfs,$(notdir $(linux_headers))))
 
-$(filter %.c,$(ldiskfs_sources)): sources $(ldiskfs_linux_headers) $(filter %.h,$(ldiskfs_sources))
+$(filter %.c,$(ldiskfs_patched_sources)): sources $(ldiskfs_linux_headers) $(filter %.h,$(ldiskfs_patched_sources))
 
 ldiskfs_sed_flags = \
        -e "s/dx_hash_info/ext3_dx_hash_info/g" \
@@ -71,7 +71,9 @@ foo-check:
        @echo "ldiskfs_LDADD: $(ldiskfs_LDADD)"
 
 MOSTLYCLEANFILES = *.o *.ko *.mod.c
-CLEANFILES = sources *.c *.h
+CLEANFILES = sources $(notdir $(linux_headers) $(ext3_headers) $(ext3_sources) $(new_sources) $(new_headers))
+
+EXTRA_DIST := lustre_quota_fmt.c lustre_quota_fmt.h quotafmt_test.c
 
 clean: clean-am
        rm -rf linux linux-stage
index 0d7cc6f..026e92f 100644 (file)
@@ -338,6 +338,9 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf)
                         cli->cl_mgmtcli_obd = mgmt_obd;
         }
 
+        spin_lock_init(&cli->cl_qchk_lock);
+        cli->cl_qchk_stat = CL_NO_QUOTACHECK;
+
         RETURN(rc);
 
 err_import:
@@ -1352,4 +1355,61 @@ void target_committed_to_req(struct ptlrpc_request *req)
         CDEBUG(D_INFO, "last_committed "LPU64", xid "LPU64"\n",
                obd->obd_last_committed, req->rq_xid);
 }
+
+int target_handle_qc_callback(struct ptlrpc_request *req)
+{
+        struct obd_quotactl *oqctl;
+        struct client_obd *cli = &req->rq_export->exp_obd->u.cli;
+
+        oqctl = lustre_swab_reqbuf(req, 0, sizeof(*oqctl),
+                                   lustre_swab_obd_quotactl);
+
+        spin_lock(&cli->cl_qchk_lock);
+        cli->cl_qchk_stat = oqctl->qc_stat;
+        spin_unlock(&cli->cl_qchk_lock);
+
+        return 0;
+}
+
+int target_handle_dqacq_callback(struct ptlrpc_request *req)
+{
+        struct obd_device *obd = req->rq_export->exp_obd;
+        struct obd_device *master_obd;
+        struct lustre_quota_ctxt *qctxt;
+        struct qunit_data *qdata, *rep;
+        int rc = 0, repsize = sizeof(struct qunit_data);
+        ENTRY;
+        
+        rc = lustre_pack_reply(req, 1, &repsize, NULL);
+        if (rc) {
+                CERROR("packing reply failed!: rc = %d\n", rc);
+                RETURN(rc);
+        }
+        rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*rep));
+        LASSERT(rep);
+        
+        qdata = lustre_swab_reqbuf(req, 0, sizeof(*qdata), lustre_swab_qdata);
+        if (qdata == NULL) {
+                CERROR("unpacking request buffer failed!");
+                RETURN(-EPROTO);
+        }
+
+        /* we use the observer */
+        LASSERT(obd->obd_observer && obd->obd_observer->obd_observer);
+        master_obd = obd->obd_observer->obd_observer;
+        qctxt = &master_obd->u.mds.mds_quota_ctxt;
+        
+        LASSERT(qctxt->lqc_handler);
+        rc = qctxt->lqc_handler(master_obd, qdata, req->rq_reqmsg->opc);
+        if (rc && rc != -EDQUOT)
+                CERROR("dqacq failed! (rc:%d)\n", rc);
+        
+        /* the qd_count might be changed in lqc_handler */
+        memcpy(rep, qdata, sizeof(*rep));
+        req->rq_status = rc;
+        rc = ptlrpc_reply(req);
+        
+        RETURN(rc);    
+}
 EXPORT_SYMBOL(target_committed_to_req);
index f74881b..450e2cf 100644 (file)
@@ -1142,6 +1142,16 @@ static int ldlm_callback_handler(struct ptlrpc_request *req)
                 rc = llog_origin_handle_cancel(req);
                 ldlm_callback_reply(req, rc);
                 RETURN(0);
+        case OBD_QC_CALLBACK:
+                OBD_FAIL_RETURN(OBD_FAIL_OBD_QC_CALLBACK_NET, 0);
+                rc = target_handle_qc_callback(req);
+                ldlm_callback_reply(req, rc);
+                RETURN(0);
+        case QUOTA_DQACQ:
+        case QUOTA_DQREL:
+                /* reply in handler */
+                rc = target_handle_dqacq_callback(req);
+                RETURN(0);
         case LLOG_ORIGIN_HANDLE_CREATE:
                 OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0);
                 rc = llog_origin_handle_create(req);
index 43f884e..63e09bc 100644 (file)
@@ -367,6 +367,27 @@ done:
         RETURN(rc);
 }
 
+#define Q_CONV(tgt, src, member) (tgt)->member = (src)->member
+
+#define QCTLCONV(tgt, src)                             \
+do {                                                   \
+        Q_CONV(tgt, src, qc_cmd);                      \
+        Q_CONV(tgt, src, qc_type);                     \
+        Q_CONV(tgt, src, qc_id);                       \
+        Q_CONV(tgt, src, qc_stat);                     \
+        Q_CONV(tgt, src, qc_dqinfo.dqi_bgrace);        \
+        Q_CONV(tgt, src, qc_dqinfo.dqi_igrace);        \
+        Q_CONV(tgt, src, qc_dqinfo.dqi_flags);         \
+        Q_CONV(tgt, src, qc_dqblk.dqb_ihardlimit);     \
+        Q_CONV(tgt, src, qc_dqblk.dqb_isoftlimit);     \
+        Q_CONV(tgt, src, qc_dqblk.dqb_curinodes);      \
+        Q_CONV(tgt, src, qc_dqblk.dqb_bhardlimit);     \
+        Q_CONV(tgt, src, qc_dqblk.dqb_bsoftlimit);     \
+        Q_CONV(tgt, src, qc_dqblk.dqb_curspace);       \
+        Q_CONV(tgt, src, qc_dqblk.dqb_btime);          \
+        Q_CONV(tgt, src, qc_dqblk.dqb_itime);          \
+} while (0)
+
 static int ll_dir_ioctl(struct inode *inode, struct file *file,
                         unsigned int cmd, unsigned long arg)
 {
@@ -654,6 +675,171 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                 obd_ioctl_freedata(buf, len);
                 RETURN(rc);
         }
+        case OBD_IOC_QUOTACHECK: {
+                struct obd_quotactl oqctl = { 0, };
+                int rc, error = 0;
+
+                if (!capable(CAP_SYS_ADMIN))
+                        RETURN(-EPERM);
+
+                oqctl.qc_type = arg;
+                rc = obd_quotacheck(sbi->ll_mdc_exp, &oqctl);
+                if (rc < 0) {
+                        CDEBUG(D_INFO, "mdc_quotacheck failed: rc %d\n", rc);
+                        error = rc;
+                }
+
+                rc = obd_quotacheck(sbi->ll_osc_exp, &oqctl);
+                if (rc < 0)
+                        CDEBUG(D_INFO, "osc_quotacheck failed: rc %d\n", rc);
+
+                if (error)
+                        rc = error;
+                return rc;
+        }
+        case OBD_IOC_POLL_QUOTACHECK: {
+                struct if_quotacheck check;
+                int rc;
+
+                if (!capable(CAP_SYS_ADMIN))
+                        RETURN(-EPERM);
+
+                rc = obd_iocontrol(cmd, sbi->ll_mdc_exp, 0, (void *)&check,
+                                   NULL);
+                if (check.stat == -ENODATA)
+                        rc = check.stat;
+                if (rc) {
+                        CDEBUG(D_QUOTA, "mdc ioctl %d failed: rc %d\n",
+                               cmd, check.stat);
+                        if (copy_to_user((void *)arg, &check, sizeof(check)))
+                                RETURN(-EFAULT);
+                        RETURN(rc);
+                }
+
+                rc = obd_iocontrol(cmd, sbi->ll_osc_exp, 0, (void *)&check,
+                                   NULL);
+                if (check.stat == -ENODATA)
+                        rc = check.stat;
+                if (rc) {
+                        CDEBUG(D_QUOTA, "osc ioctl %d failed: rc %d\n",
+                               cmd, rc);
+                        if (copy_to_user((void *)arg, &check, sizeof(check)))
+                                RETURN(-EFAULT);
+                        RETURN(rc);
+                }
+                 
+                RETURN(0);
+        }
+        case OBD_IOC_QUOTACTL: {
+                struct if_quotactl qctl;
+                struct obd_quotactl oqctl;
+                
+                int cmd, type, id, rc = 0, error = 0;
+
+                if (copy_from_user(&qctl, (void *)arg, sizeof(qctl)))
+                        RETURN(-EFAULT);
+
+                cmd = qctl.qc_cmd;
+                type = qctl.qc_type;
+                id = qctl.qc_id;
+                switch (cmd) {
+                case Q_QUOTAON:
+                case Q_QUOTAOFF:
+                case Q_SETQUOTA:
+                case Q_SETINFO:
+                        if (!capable(CAP_SYS_ADMIN))
+                                RETURN(-EPERM);
+                        break;
+                case Q_GETQUOTA:
+                        if (((type == USRQUOTA && current->euid != id) ||
+                             (type == GRPQUOTA && !in_egroup_p(id))) &&
+                            !capable(CAP_SYS_ADMIN))
+                                RETURN(-EPERM);
+                        break;
+                case Q_GETINFO:
+                        break;
+                default:
+                        RETURN(-EINVAL);
+                }
+
+                QCTLCONV(&oqctl, &qctl);
+
+                if (qctl.obd_uuid.uuid[0]) {
+                        struct obd_device *obd;
+                        struct obd_uuid *uuid = &qctl.obd_uuid;
+
+                        if (cmd == Q_GETINFO)
+                                oqctl.qc_cmd = Q_GETOINFO;
+                        else if (cmd == Q_GETQUOTA)
+                                oqctl.qc_cmd = Q_GETOQUOTA;
+                        else
+                                RETURN(-EINVAL);
+
+                        rc = -ENOENT;
+                        obd = class_find_client_notype(uuid,
+                                         &sbi->ll_osc_exp->exp_obd->obd_uuid);
+                        if (!obd)
+                                RETURN(rc);
+
+                        if (sbi->ll_mdc_exp->exp_obd == obd) {
+                                rc = obd_quotactl(sbi->ll_mdc_exp, &oqctl);
+                        } else {
+                                int i;
+                                struct obd_export *exp;
+                                struct lov_obd *lov = &sbi->ll_osc_exp->
+                                                            exp_obd->u.lov;
+
+                                for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+                                        exp = lov->tgts[i].ltd_exp;
+
+                                        if (!lov->tgts[i].active)
+                                                continue;
+
+                                        if (exp->exp_obd == obd) {
+                                                rc = obd_quotactl(exp, &oqctl);
+                                                break;
+                                        }
+                                }
+                        }
+
+                        QCTLCONV(&qctl, &oqctl);
+
+                        if (copy_to_user((void *)arg, &qctl, sizeof(qctl)))
+                                RETURN(-EFAULT);
+
+                        RETURN(rc);
+                }
+
+                if (cmd == Q_SETQUOTA)
+                        oqctl.qc_dqblk.dqb_valid = QIF_LIMITS;
+
+                rc = obd_quotactl(sbi->ll_mdc_exp, &oqctl);
+                if (rc) {
+                        if (rc == -EBUSY && cmd == Q_QUOTAON)
+                                error = rc;
+                        else
+                                RETURN(rc);
+                }
+
+                if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF) {
+                        rc = obd_quotactl(sbi->ll_osc_exp, &oqctl);
+                        if (rc) {
+                                if (rc != -EBUSY && cmd == Q_QUOTAON) {
+                                        oqctl.qc_cmd = Q_QUOTAOFF;
+                                        obd_quotactl(sbi->ll_mdc_exp, &oqctl);
+                                        obd_quotactl(sbi->ll_osc_exp, &oqctl);
+                                }
+                                RETURN(rc);
+                        }
+                }
+
+                QCTLCONV(&qctl, &oqctl);
+
+                if (copy_to_user((void *)arg, &qctl, sizeof(qctl)))
+                        return -EFAULT;
+
+                RETURN(rc?:error);
+        }
         default:
                 return obd_iocontrol(cmd, sbi->ll_osc_exp,0,NULL,(void *)arg);
         }
index 50c6d11..4aa8183 100644 (file)
@@ -215,6 +215,8 @@ struct ll_async_page {
                          llap_origin:3,
                          llap_ra_used:1;
         struct list_head llap_pglist_item;
+        /* user credit information for oss enforcement quota */
+        struct obd_ucred llap_ouc;
 };
 
 enum {
index e6d8a2a..0b008a5 100644 (file)
@@ -347,7 +347,8 @@ void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa)
                 mdc_pack_fid(obdo_fid(oa), inode->i_ino, 0, inode->i_mode);
                 oa->o_easize = ll_i2info(inode)->lli_io_epoch;
 
-                valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME;
+                valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
+                               OBD_MD_FLUID | OBD_MD_FLGID;
         }
 
         obdo_from_inode(oa, inode, valid_flags);
@@ -368,11 +369,26 @@ static void ll_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
         EXIT;
 }
 
+static void ll_ap_get_ucred(void *data, struct obd_ucred *ouc)
+{
+        struct ll_async_page *llap;
+
+        llap = llap_from_cookie(data);
+        if (IS_ERR(llap)) {
+                EXIT;
+                return;
+        }
+
+        memcpy(ouc, &llap->llap_ouc, sizeof(*ouc));
+        EXIT;
+}
+
 static struct obd_async_page_ops ll_async_page_ops = {
         .ap_make_ready =        ll_ap_make_ready,
         .ap_refresh_count =     ll_ap_refresh_count,
         .ap_fill_obdo =         ll_ap_fill_obdo,
         .ap_completion =        ll_ap_completion,
+        .ap_get_ucred =         ll_ap_get_ucred,
 };
 
 struct ll_async_page *llap_cast_private(struct page *page)
@@ -520,6 +536,7 @@ struct ll_async_page *llap_from_page(struct page *page, unsigned origin)
                 RETURN(ERR_PTR(-ENOMEM));
         llap->llap_magic = LLAP_MAGIC;
         llap->llap_cookie = (void *)llap + size_round(sizeof(*llap));
+
         rc = obd_prep_async_page(exp, ll_i2info(inode)->lli_smd, NULL, page,
                                  (obd_off)page->index << PAGE_SHIFT,
                                  &ll_async_page_ops, llap, &llap->llap_cookie);
@@ -624,6 +641,7 @@ int ll_commit_write(struct file *file, struct page *page, unsigned from,
         struct lov_stripe_md *lsm = lli->lli_smd;
         struct obd_export *exp;
         struct ll_async_page *llap;
+        struct ll_uctxt ctxt;
         loff_t size;
         int rc = 0;
         ENTRY;
@@ -643,6 +661,13 @@ int ll_commit_write(struct file *file, struct page *page, unsigned from,
         if (exp == NULL)
                 RETURN(-EINVAL);
 
+        /* set user credit information for this page */
+        llap->llap_ouc.ouc_fsuid = current->fsuid;
+        llap->llap_ouc.ouc_fsgid = current->fsgid;
+        llap->llap_ouc.ouc_cap = current->cap_effective;
+        ll_i2uctxt(&ctxt, inode, NULL);
+        llap->llap_ouc.ouc_suppgid1 = ctxt.gid1;
+
         /* queue a write for some time in the future the first time we
          * dirty the page */
         if (!PageDirty(page)) {
index 7a52d72..6537be4 100644 (file)
  * Unset cookies should be all-zero (which will never occur naturally). */
 static int lov_llog_origin_add(struct llog_ctxt *ctxt,
                         struct llog_rec_hdr *rec, struct lov_stripe_md *lsm,
-                        struct llog_cookie *logcookies, int numcookies)
+                        struct llog_cookie *logcookies, int numcookies,
+                        llog_fill_rec_cb_t fill_cb)
 {
         struct obd_device *obd = ctxt->loc_obd;
         struct lov_obd *lov = &obd->u.lov;
         struct lov_oinfo *loi;
-        struct llog_unlink_rec *lur;
         int i, rc = 0;
         ENTRY;
 
-        OBD_ALLOC(lur, sizeof(*lur));
-        if (!lur)
-                RETURN(-ENOMEM);
-        lur->lur_hdr.lrh_len = lur->lur_tail.lrt_len = sizeof(*lur);
-        lur->lur_hdr.lrh_type = MDS_UNLINK_REC;
-
         LASSERT(logcookies && numcookies >= lsm->lsm_stripe_count);
 
         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
                 struct obd_device *child = lov->tgts[loi->loi_ost_idx].ltd_exp->exp_obd; 
                 struct llog_ctxt *cctxt = llog_get_context(child, ctxt->loc_idx);
+                struct llog_fill_rec_data data;
+
+                /* fill mds unlink/setattr log record */
+                data.lfd_id = loi->loi_id;
+                data.lfd_ogen = loi->loi_gr;
+                fill_cb(rec, &data);
 
-                lur->lur_oid = loi->loi_id;
-                lur->lur_ogen = loi->loi_gr;
-                rc += llog_add(cctxt, &lur->lur_hdr, NULL, logcookies + rc,
-                                numcookies - rc);
+                rc += llog_add(cctxt, rec, NULL, logcookies + rc,
+                                numcookies - rc, fill_cb);
 
         }
-        OBD_FREE(lur, sizeof(*lur));
 
         RETURN(rc);
 }
@@ -155,7 +152,7 @@ static int lov_llog_repl_cancel(struct llog_ctxt *ctxt, struct lov_stripe_md *ls
         RETURN(rc);
 }
 
-static struct llog_operations lov_unlink_orig_logops = {
+static struct llog_operations lov_mds_ost_orig_logops = {
         lop_add: lov_llog_origin_add,
         lop_connect: lov_llog_origin_connect
 };
@@ -172,8 +169,8 @@ int lov_llog_init(struct obd_device *obd, struct obd_device *tgt,
         int i, rc = 0;
         ENTRY;
 
-        rc = llog_setup(obd, LLOG_UNLINK_ORIG_CTXT, tgt, 0, NULL,
-                        &lov_unlink_orig_logops);
+        rc = llog_setup(obd, LLOG_MDS_OST_ORIG_CTXT, tgt, 0, NULL,
+                        &lov_mds_ost_orig_logops);
         if (rc)
                 RETURN(rc);
 
@@ -205,7 +202,7 @@ int lov_llog_finish(struct obd_device *obd, int count)
 
         /* cleanup our llogs only if the ctxts have been setup
          * (client lov doesn't setup, mds lov does). */
-        ctxt = llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT);
+        ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT);
         if (ctxt)
                 rc = llog_cleanup(ctxt);
 
index 05bc98c..3f212f3 100644 (file)
@@ -1001,6 +1001,55 @@ static int lov_setattr(struct obd_export *exp, struct obdo *src_oa,
         RETURN(rc);
 }
 
+static int lov_setattr_async(struct obd_export *exp, struct obdo *src_oa,
+                       struct lov_stripe_md *lsm, struct obd_trans_info *oti)
+{
+        struct lov_obd *lov;
+        struct lov_oinfo *loi = NULL;
+        int rc = 0, err;
+        obd_id objid = src_oa->o_id;
+        int i;
+        ENTRY;
+                                                                                                                             
+        ASSERT_LSM_MAGIC(lsm);
+        LASSERT(oti);
+        if (src_oa->o_valid & OBD_MD_FLCOOKIE)
+                LASSERT(oti->oti_logcookies);
+                                                                                                                             
+        if (!exp || !exp->exp_obd)
+                RETURN(-ENODEV);
+
+        /* support OBD_MD_FLUID, OBD_MD_FLGID and OBD_MD_FLCOOKIE now */
+        LASSERT(!(src_oa->o_valid &  ~(OBD_MD_FLID | OBD_MD_FLUID |
+                                       OBD_MD_FLGID| OBD_MD_FLCOOKIE)));
+        lov = &exp->exp_obd->u.lov;
+
+        loi = lsm->lsm_oinfo;
+        for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
+                if (lov->tgts[loi->loi_ost_idx].active == 0) {
+                        CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+                        goto next;
+                }
+
+                src_oa->o_id = loi->loi_id;
+                /* do chown/chgrp on OST asynchronously */
+                err = obd_setattr_async(lov->tgts[loi->loi_ost_idx].ltd_exp,
+                                        src_oa, NULL, oti);
+                if (err) {
+                        CERROR("error: setattr objid "LPX64" subobj "
+                               LPX64" on OST idx %d: rc = %d\n",
+                               objid, src_oa->o_id, i, err);
+                        if (!rc)
+                                rc = err;
+                }
+        next:
+                if (src_oa->o_valid & OBD_MD_FLCOOKIE)
+                        oti->oti_logcookies++;
+        }
+
+        RETURN(rc);
+}
+
 /* FIXME: maybe we'll just make one node the authoritative attribute node, then
  * we can send this 'punch' to just the authoritative node and the nodes
  * that the punch will affect. */
@@ -1253,11 +1302,19 @@ static void lov_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
         lap->lap_caller_ops->ap_completion(lap->lap_caller_data, cmd, oa, rc);
 }
 
+static void lov_ap_get_ucred(void *data, struct obd_ucred *ouc)
+{
+        struct lov_async_page *lap = LAP_FROM_COOKIE(data);
+
+        lap->lap_caller_ops->ap_get_ucred(lap->lap_caller_data, ouc);
+}
+
 static struct obd_async_page_ops lov_async_page_ops = {
         .ap_make_ready =        lov_ap_make_ready,
         .ap_refresh_count =     lov_ap_refresh_count,
         .ap_fill_obdo =         lov_ap_fill_obdo,
         .ap_completion =        lov_ap_completion,
+        .ap_get_ucred =         lov_ap_get_ucred,
 };
 
 int lov_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm,
@@ -2093,6 +2150,64 @@ int lov_complete_many(struct obd_export *exp, struct lov_stripe_md *lsm,
 }
 #endif
 
+static int lov_quotacheck(struct obd_export *exp, struct obd_quotactl *oqctl)
+{
+        struct obd_device *obd = class_exp2obd(exp);
+        struct lov_obd *lov = &obd->u.lov;
+        int i, rc = 0;
+        ENTRY;
+
+        for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+                int err;
+
+                if (!lov->tgts[i].active) {
+                        CDEBUG(D_HA, "lov idx %d inactive\n", i);
+                        continue;
+                }
+
+                err = obd_quotacheck(lov->tgts[i].ltd_exp, oqctl);
+                if (err) {
+                        if (lov->tgts[i].active && !rc)
+                                rc = err;
+                        continue;
+                }
+        }
+
+        RETURN(rc);
+}
+
+static int lov_quotactl(struct obd_export *exp, struct obd_quotactl *oqctl)
+{
+        struct obd_device *obd = class_exp2obd(exp);
+        struct lov_obd *lov = &obd->u.lov;
+        __u64 curspace = oqctl->qc_dqblk.dqb_curspace;
+        int i, rc = 0;
+        ENTRY;
+
+        for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+                int err;
+
+                if (!lov->tgts[i].active) {
+                        CDEBUG(D_HA, "lov idx %d inactive\n", i);
+                        continue;
+                }
+
+                err = obd_quotactl(lov->tgts[i].ltd_exp, oqctl);
+                if (err) {
+                        if (lov->tgts[i].active && !rc)
+                                rc = err;
+                        continue;
+                }
+
+                if (oqctl->qc_cmd == Q_GETQUOTA)
+                        curspace += oqctl->qc_dqblk.dqb_curspace;
+        }
+
+        if (oqctl->qc_cmd == Q_GETQUOTA)
+                oqctl->qc_dqblk.dqb_curspace = curspace;
+        RETURN(rc);
+}
+
 struct obd_ops lov_obd_ops = {
         .o_owner               = THIS_MODULE,
         .o_setup               = lov_setup,
@@ -2108,6 +2223,7 @@ struct obd_ops lov_obd_ops = {
         .o_getattr             = lov_getattr,
         .o_getattr_async       = lov_getattr_async,
         .o_setattr             = lov_setattr,
+        .o_setattr_async       = lov_setattr_async,
         .o_brw                 = lov_brw,
         .o_brw_async           = lov_brw_async,
         .o_prep_async_page     = lov_prep_async_page,
@@ -2131,6 +2247,8 @@ struct obd_ops lov_obd_ops = {
         .o_llog_init           = lov_llog_init,
         .o_llog_finish         = lov_llog_finish,
         .o_notify              = lov_notify,
+        .o_quotacheck          = lov_quotacheck,
+        .o_quotactl            = lov_quotactl,
 };
 
 int __init lov_init(void)
index 791e48d..ac5a8a2 100644 (file)
@@ -1,6 +1,8 @@
-MODULES := lvfs fsfilt_@BACKINGFS@
+MODULES := lvfs fsfilt_@BACKINGFS@ quotactl_test quotacheck_test
 
 lvfs-objs := lvfs_common.o lvfs_linux.o fsfilt.o
+quotactl-objs := quotactl_test.o
+quotaccheck-objs := quotacheck_test.o
 
 ifeq ($(PATCHLEVEL),6)
 fsfilt_@BACKINGFS@-objs := fsfilt-@BACKINGFS@.o
index 0b642f7..814d299 100644 (file)
@@ -42,7 +42,8 @@ sources:
 endif # MODULES
 
 DIST_SOURCES = fsfilt.c fsfilt_ext3.c fsfilt_reiserfs.c lvfs_common.c \
-       lvfs_internal.h lvfs_linux.c lvfs_userfs.c
+       lvfs_internal.h lvfs_linux.c lvfs_userfs.c \
+        quotacheck_test.c quotactl_test.c
 
 MOSTLYCLEANFILES = *.o *.ko *.mod.c
 CLEANFILES = fsfilt-*.c fsfilt_ldiskfs.c fsfilt_extN.c sources
index cd6d693..eb890e5 100644 (file)
@@ -36,6 +36,9 @@
 #include <linux/ext3_jbd.h>
 #include <linux/version.h>
 #include <linux/bitops.h>
+#include <linux/quota.h>
+#include <linux/quotaio_v1.h>
+#include <linux/quotaio_v2.h>
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
 #include <linux/ext3_xattr.h>
 #else
@@ -46,6 +49,7 @@
 #include <linux/lustre_fsfilt.h>
 #include <linux/obd.h>
 #include <linux/obd_class.h>
+#include <linux/lustre_quota.h>
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
 #include <linux/iobuf.h>
 #endif
@@ -148,6 +152,8 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private,
         case FSFILT_OP_SETATTR:
                 /* Setattr on inode */
                 nblocks += 1;
+                nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS +
+                        EXT3_DATA_TRANS_BLOCKS;
                 break;
         case FSFILT_OP_CANCEL_UNLINK:
                 /* blocks for log header bitmap update OR
@@ -1171,6 +1177,549 @@ static int fsfilt_ext3_get_op_len(int op, struct fsfilt_objinfo *fso, int logs)
         return 0;
 }
 
+static const char *op_quotafile[] = { "aquota.user", "aquota.group" };
+
+static int fsfilt_ext3_quotactl(struct super_block *sb,
+                                struct obd_quotactl *oqc)
+{
+        int i, rc = 0, error = 0;
+        struct if_dqinfo *info = (struct if_dqinfo *)&oqc->qc_dqinfo;
+        struct if_dqblk *dqblk = (struct if_dqblk *)&oqc->qc_dqblk;
+        ENTRY;
+
+        /* XXX: quotaoff */
+        return -EOPNOTSUPP;
+
+        for (i = 0; i < MAXQUOTAS; i++) if (Q_TYPESET(oqc, i)) {
+                switch (oqc->qc_cmd) {
+                case Q_QUOTAON: {
+                        rc = sb->s_qcop->quota_on(sb, i, oqc->qc_id,
+                                                  (char *)op_quotafile[i]);
+                        if (rc == -EBUSY)
+                                error = rc;
+                        else if (rc)
+                                GOTO(out, rc);
+                        break;
+                }
+                case Q_QUOTAOFF: {
+                        sb->s_qcop->quota_off(sb, i);
+                        break;
+                }
+                case Q_GETOINFO:
+                case Q_GETINFO:
+                        rc = sb->s_qcop->get_info(sb, i, info);
+                        GOTO(out, rc);
+                case Q_SETQUOTA:
+                        rc = sb->s_qcop->set_dqblk(sb, i, oqc->qc_id, dqblk);
+                        GOTO(out, rc);
+                case Q_GETOQUOTA:
+                case Q_GETQUOTA:
+                        rc = sb->s_qcop->get_dqblk(sb, i, oqc->qc_id, dqblk);
+                        GOTO(out, rc);
+                default:
+                        CERROR("unsupported quotactl command: %d", oqc->qc_cmd);
+                        LBUG();
+                }
+        }
+out:
+        if (!rc && error)
+                rc = error;
+        RETURN(rc);
+}
+
+struct chkquot {
+        struct hlist_node       cq_hash;
+        struct list_head        cq_list;
+        qid_t                   cq_id;
+        short                   cq_type;
+        __u32                   cq_bhardlimit;
+        __u32                   cq_bsoftlimit;
+        qsize_t                 cq_curspace;
+        __u32                   cq_ihardlimit;
+        __u32                   cq_isoftlimit;
+        __u32                   cq_curinodes;
+        __u64                   cq_btime;
+        __u64                   cq_itime;
+};
+                                                                                                                 
+static inline unsigned int const
+chkquot_hash(qid_t id, int type)
+{
+        return (id * (MAXQUOTAS - type)) % NR_DQHASH;
+}
+
+static inline struct chkquot *
+find_chkquot(struct hlist_head *head, qid_t id, int type)
+{
+        struct hlist_node *node, *next;
+        struct chkquot *cq = NULL;
+
+        hlist_for_each_safe(node, next, head) {
+                cq = hlist_entry(node, struct chkquot, cq_hash);
+                if (cq->cq_id == id && cq->cq_type == type)
+                        return cq;
+        }
+
+        return NULL;
+}
+                                                                                                                 
+static struct chkquot *alloc_chkquot(qid_t id, int type)
+{
+        struct chkquot *cq;
+
+        OBD_ALLOC(cq, sizeof(*cq));
+        if (cq) {
+#if 0           /* XXX: 2.4 doesn't support this macro */
+                INIT_HLIST_NODE(&cq->cq_hash);
+#endif
+                INIT_LIST_HEAD(&cq->cq_list);
+                cq->cq_id = id;
+                cq->cq_type = type;
+        }
+
+        return cq;
+}
+                                                                                                                 
+static struct chkquot *
+cqget(struct super_block *sb, struct hlist_head *hash, struct list_head *list,
+      qid_t id, int type, int first_check)
+{
+        struct hlist_head *head = hash + chkquot_hash(id, type);
+        struct if_dqblk dqb;
+        struct chkquot *cq;
+        int rc;
+                                                                                                                 
+        cq = find_chkquot(head, id, type);
+        if (cq)
+                return cq;
+        
+        cq = alloc_chkquot(id, type);
+        if (!cq)
+                return NULL;
+
+        if (!first_check) {
+                rc = sb->s_qcop->get_dqblk(sb, type, id, &dqb);
+                if (!rc) {
+                        cq->cq_bhardlimit = dqb.dqb_bhardlimit;
+                        cq->cq_bsoftlimit = dqb.dqb_bsoftlimit;
+                        cq->cq_ihardlimit = dqb.dqb_ihardlimit;
+                        cq->cq_isoftlimit = dqb.dqb_isoftlimit;
+                        cq->cq_btime = dqb.dqb_btime;
+                        cq->cq_itime = dqb.dqb_itime;
+                }
+        }
+
+        hlist_add_head(&cq->cq_hash, head);
+        list_add_tail(&cq->cq_list, list);
+
+        return cq;
+}
+
+static inline int quota_onoff(struct super_block *sb, int cmd, int type)
+{
+        struct obd_quotactl *oqctl;
+        int rc;
+
+        OBD_ALLOC(oqctl, sizeof(*oqctl));
+
+        oqctl->qc_cmd = cmd;
+        oqctl->qc_id = QFMT_LDISKFS;
+        oqctl->qc_type = type;
+        rc = fsfilt_ext3_quotactl(sb, oqctl);
+
+        OBD_FREE(oqctl, sizeof(*oqctl));
+        return rc;
+}
+
+static inline void read_old_dqinfo(struct super_block *sb, int type,
+                            struct if_dqinfo *dqinfo)
+{
+        struct obd_quotactl *oqctl;
+        int rc;
+        ENTRY;
+
+        OBD_ALLOC(oqctl, sizeof(*oqctl));
+
+        oqctl->qc_cmd = Q_GETINFO;
+        oqctl->qc_type = type;
+        rc = fsfilt_ext3_quotactl(sb, oqctl);
+        if (!rc)
+                memcpy(dqinfo + type, &oqctl->qc_dqinfo, sizeof(*dqinfo));
+
+        OBD_FREE(oqctl, sizeof(*oqctl));
+        EXIT;
+}
+
+static inline struct ext3_group_desc *
+get_group_desc(struct super_block *sb, int group)
+{
+        unsigned long desc_block, desc;
+        struct ext3_group_desc *gdp;
+                                                                                                                 
+        desc_block = group / EXT3_DESC_PER_BLOCK(sb);
+        desc = group % EXT3_DESC_PER_BLOCK(sb);
+        gdp = (struct ext3_group_desc *)
+              EXT3_SB(sb)->s_group_desc[desc_block]->b_data;
+                                                                                                                 
+        return gdp + desc;
+}
+
+static inline struct buffer_head *
+read_inode_bitmap(struct super_block *sb, unsigned long group)
+{
+        struct ext3_group_desc *desc;
+        struct buffer_head *bh;
+                                                                                                                 
+        desc = get_group_desc(sb, group);
+        bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
+                                                                                                                 
+        return bh;
+}
+
+static inline struct inode *ext3_iget_inuse(struct super_block *sb,
+                                     struct buffer_head *bitmap_bh,
+                                     int index, unsigned long ino)
+{
+        struct inode *inode = NULL;
+                                                                                                                 
+        if (ext3_test_bit(index, bitmap_bh->b_data))
+                inode = iget(sb, ino);
+                                                                                                                 
+        return inode;
+}
+
+struct qchk_ctxt {
+        struct hlist_head       hash[NR_DQHASH];
+        struct list_head        list;
+        int                     first_check[MAXQUOTAS];
+        struct if_dqinfo        dqinfo[MAXQUOTAS];
+};
+
+static int add_inode_quota(struct inode *inode, struct qchk_ctxt *qctxt,
+                           struct obd_quotactl *oqc)
+{
+        struct chkquot *cq;
+        loff_t size = 0;
+        qid_t qid[MAXQUOTAS];
+        int cnt;
+
+        if (!inode)
+                return 0;
+
+        qid[USRQUOTA] = inode->i_uid;
+        qid[GRPQUOTA] = inode->i_gid;
+                                                                                                                 
+        if (S_ISDIR(inode->i_mode) ||
+            S_ISREG(inode->i_mode) ||
+            S_ISLNK(inode->i_mode))
+                size = inode_get_bytes(inode);
+                                                                                                                 
+        for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (Q_TYPESET(oqc, cnt)) {
+                cq = cqget(inode->i_sb, qctxt->hash, &qctxt->list, qid[cnt],
+                           cnt, qctxt->first_check[cnt]);
+                if (!cq)
+                        return -ENOMEM;
+
+                cq->cq_curspace += size;
+                cq->cq_curinodes ++;
+        }
+                                                                                                                 
+        return 0;
+}
+
+static int v2_write_dqheader(struct file *f, int type)
+{
+        int quota_magics[] = V2_INITQMAGICS;
+        int quota_versions[] = V2_INITQVERSIONS;
+        struct v2_disk_dqheader dqhead;
+        ssize_t size;
+        loff_t offset = 0;
+        mm_segment_t fs;
+                                                                                                                 
+        dqhead.dqh_magic = cpu_to_le32(quota_magics[type]);
+        dqhead.dqh_version = cpu_to_le32(quota_versions[type]);
+                                                                                                                 
+        fs = get_fs();
+        set_fs(KERNEL_DS);
+        size = f->f_op->write(f, (char *)&dqhead, sizeof(dqhead), &offset);
+        set_fs(fs);
+        if (size != sizeof(dqhead)) {
+                CERROR("error writing dqhead in quota file");
+                return -1;
+        }
+                                                                                                                 
+        return 0;
+}
+
+/* write dqinfo struct in a new quota file */
+static int v2_write_dqinfo(struct file *f, int type, struct if_dqinfo *info)
+{
+        struct v2_disk_dqinfo dqinfo;
+        int blocks = V2_DQTREEOFF + 1;
+        ssize_t size;
+        loff_t offset = V2_DQINFOOFF;
+        mm_segment_t fs;
+
+        if (info) {
+                dqinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
+                dqinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
+                dqinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK &
+                                               ~DQF_INFO_DIRTY);
+        } else {
+                dqinfo.dqi_bgrace = cpu_to_le32(MAX_DQ_TIME);
+                dqinfo.dqi_igrace = cpu_to_le32(MAX_IQ_TIME);
+                dqinfo.dqi_flags = 0;
+        }
+
+        dqinfo.dqi_blocks = cpu_to_le32(blocks);
+        dqinfo.dqi_free_blk = 0;
+        dqinfo.dqi_free_entry = 0;
+
+        fs = get_fs();
+        set_fs(KERNEL_DS);
+        size = f->f_op->write(f, (char *)&dqinfo, sizeof(dqinfo), &offset);
+        set_fs(fs);
+
+        if (size != sizeof(dqinfo)) {
+                CERROR("error writing dqinfo in quota file");
+                return -1;
+        }
+
+        return 0;
+}
+
+static int create_new_quota_files(struct qchk_ctxt *qctxt,
+                                  struct obd_quotactl *oqc)
+{
+        int i, rc = 0;
+        ENTRY;
+
+        for (i = 0; i < MAXQUOTAS; i++) if (Q_TYPESET(oqc, i)) {
+                struct if_dqinfo *info = qctxt->first_check[i]? NULL:
+                                                      &qctxt->dqinfo[i];
+                struct file *file;
+
+                file = filp_open(op_quotafile[i],
+                                 O_RDWR | O_CREAT | O_TRUNC, 0644);
+                if (IS_ERR(file)) {
+                        rc = PTR_ERR(file);
+                        CERROR("can't create %s file: rc = %d\n",
+                               op_quotafile[i], rc);
+                        GOTO(out, rc);
+                }
+
+                rc = v2_write_dqheader(file, i);
+                if (rc) {
+                        filp_close(file, 0);
+                        GOTO(out, rc = -EIO);
+                }
+
+                rc = v2_write_dqinfo(file, i, info);
+                if (rc) {
+                        filp_close(file, 0);
+                        GOTO(out, rc = -EIO);
+                }
+
+                filp_close(file, 0);
+        }
+
+out:
+        RETURN(rc);
+}
+
+
+static int commit_chkquot(struct super_block *sb, struct qchk_ctxt *qctxt,
+                          struct chkquot *cq)
+{
+        struct obd_quotactl oqc = { 0, };
+        struct timeval now;
+
+        do_gettimeofday(&now);
+
+        if (cq->cq_bsoftlimit &&
+            toqb(cq->cq_curspace) >= cq->cq_bsoftlimit &&
+            !cq->cq_btime)
+                cq->cq_btime = now.tv_sec +
+                               qctxt->dqinfo[cq->cq_type].dqi_bgrace;
+
+        if (cq->cq_isoftlimit &&
+            cq->cq_curinodes >= cq->cq_isoftlimit &&
+            !cq->cq_itime)
+                cq->cq_itime = now.tv_sec +
+                               qctxt->dqinfo[cq->cq_type].dqi_igrace;
+
+        oqc.qc_cmd = Q_SETQUOTA;
+        oqc.qc_type = cq->cq_type;
+        oqc.qc_id = cq->cq_id;
+        oqc.qc_dqblk.dqb_bhardlimit = cq->cq_bhardlimit;
+        oqc.qc_dqblk.dqb_bsoftlimit = cq->cq_bsoftlimit;
+        oqc.qc_dqblk.dqb_curspace = cq->cq_curspace;
+        oqc.qc_dqblk.dqb_ihardlimit = cq->cq_ihardlimit;
+        oqc.qc_dqblk.dqb_isoftlimit = cq->cq_isoftlimit;
+        oqc.qc_dqblk.dqb_curinodes = cq->cq_curinodes;
+        oqc.qc_dqblk.dqb_btime = cq->cq_btime;
+        oqc.qc_dqblk.dqb_itime = cq->cq_itime;
+        oqc.qc_dqblk.dqb_valid = QIF_ALL;
+        
+        return fsfilt_ext3_quotactl(sb, &oqc);
+}
+static int prune_chkquots(struct super_block *sb,
+                          struct qchk_ctxt *qctxt, int error)
+{
+        struct chkquot *cq, *tmp;
+        int rc;
+
+        list_for_each_entry_safe(cq, tmp, &qctxt->list, cq_list) {
+                if (!error) {
+                        rc = commit_chkquot(sb, qctxt, cq);
+                        if (rc)
+                                error = rc;
+                }
+                hlist_del_init(&cq->cq_hash);
+                list_del(&cq->cq_list);
+                OBD_FREE(cq, sizeof(*cq));
+        }
+
+        return error;
+}
+
+static int fsfilt_ext3_quotacheck(struct super_block *sb,
+                                  struct obd_quotactl *oqc)
+{
+        struct ext3_sb_info *sbi = EXT3_SB(sb);
+        int i, group;
+        struct qchk_ctxt *qctxt;
+        struct buffer_head *bitmap_bh = NULL;
+        unsigned long ino;
+        struct inode *inode;
+        int rc;
+        ENTRY;
+
+        /* turn on quota and read dqinfo if existed */
+        OBD_ALLOC(qctxt, sizeof(*qctxt));
+        if (!qctxt)
+                RETURN(-ENOMEM);
+
+        for (i = 0; i < NR_DQHASH; i++)
+                INIT_HLIST_HEAD(&qctxt->hash[i]);
+        INIT_LIST_HEAD(&qctxt->list);
+
+        for (i = 0; i < MAXQUOTAS; i++) if (Q_TYPESET(oqc, i)) {
+                rc = quota_onoff(sb, Q_QUOTAON, i);
+                if (!rc || rc == -EBUSY) 
+                        read_old_dqinfo(sb, i, qctxt->dqinfo);
+                else if (rc == -ENOENT)
+                        qctxt->first_check[i] = 1;
+                else if (rc)
+                        GOTO(out, rc);
+        }
+
+        /* check quota and update in hash */
+        for (group = 0; group < sbi->s_groups_count; group++) {
+                ino = group * sbi->s_inodes_per_group + 1;
+                brelse(bitmap_bh);
+                bitmap_bh = read_inode_bitmap(sb, group);
+
+                for (i = 0; i < sbi->s_inodes_per_group; i++, ino++) {
+                        if (ino < sbi->s_first_ino)
+                                continue;
+
+                        inode = ext3_iget_inuse(sb, bitmap_bh, i, ino);
+                        rc = add_inode_quota(inode, qctxt, oqc);
+                        iput(inode);
+                        if (rc) {
+                                brelse(bitmap_bh);
+                                GOTO(out, rc);
+                        }
+                }
+        }
+        brelse(bitmap_bh);
+
+        /* turn off quota cause we are to dump chkquot to files */
+        quota_onoff(sb, Q_QUOTAOFF, oqc->qc_type);
+
+        rc = create_new_quota_files(qctxt, oqc);
+        if (rc)
+                GOTO(out, rc);
+
+        /* we use vfs functions to set dqblk, so turn quota on */
+        rc = quota_onoff(sb, Q_QUOTAON, oqc->qc_type);
+        if (rc)
+                GOTO(out, rc);
+
+out:
+        /* dump and free chkquot */
+        rc = prune_chkquots(sb, qctxt, rc);
+        OBD_FREE(qctxt, sizeof(*qctxt));
+
+        /* turn off quota, `lfs quotacheck` will turn on when all
+         * nodes quotacheck finish. */
+        quota_onoff(sb, Q_QUOTAOFF, oqc->qc_type);
+
+        if (rc)
+                CERROR("quotacheck failed: rc = %d\n", rc);
+
+        oqc->qc_stat = rc;
+        RETURN(rc);
+}
+
+static int fsfilt_ext3_quotainfo(struct lustre_quota_info *lqi, int type, int cmd)
+{
+        int rc = 0;
+        ENTRY;
+
+        switch (cmd) {
+        case QFILE_CHK:
+                rc = lustre_check_quota_file(lqi, type);
+                break;
+        case QFILE_RD_INFO:
+                rc = lustre_read_quota_info(lqi, type);
+                break;
+        case QFILE_WR_INFO:
+                rc = lustre_write_quota_info(lqi, type);
+                break;
+        case QFILE_INIT_INFO:
+                rc = lustre_init_quota_info(lqi, type);
+                break;
+        default:
+                CERROR("Unsupported admin quota file cmd %d\n", cmd);
+                LBUG();
+                break;
+        }
+        RETURN(rc);
+}
+
+static int fsfilt_ext3_dquot(struct lustre_dquot *dquot, int cmd)
+{
+        int rc = 0;
+        ENTRY;
+
+        switch (cmd) {
+        case QFILE_RD_DQUOT:
+                rc = lustre_read_dquot(dquot);
+                break;
+        case QFILE_WR_DQUOT:
+                if (dquot->dq_dqb.dqb_ihardlimit || 
+                    dquot->dq_dqb.dqb_isoftlimit ||
+                    dquot->dq_dqb.dqb_bhardlimit || 
+                    dquot->dq_dqb.dqb_bsoftlimit)
+                        clear_bit(DQ_FAKE_B, &dquot->dq_flags);
+                else
+                        set_bit(DQ_FAKE_B, &dquot->dq_flags);
+
+                rc = lustre_commit_dquot(dquot);
+                if (rc >= 0)
+                        rc = 0;
+                break;
+        default:
+                CERROR("Unsupported admin quota file cmd %d\n", cmd);
+                LBUG();
+                break;
+        }
+        RETURN(rc);
+}
+
 static struct fsfilt_operations fsfilt_ext3_ops = {
         .fs_type                = "ext3",
         .fs_owner               = THIS_MODULE,
@@ -1194,6 +1743,10 @@ static struct fsfilt_operations fsfilt_ext3_ops = {
         .fs_setup               = fsfilt_ext3_setup,
         .fs_send_bio            = fsfilt_ext3_send_bio,
         .fs_get_op_len          = fsfilt_ext3_get_op_len,
+        .fs_quotactl            = fsfilt_ext3_quotactl,
+        .fs_quotacheck          = fsfilt_ext3_quotacheck,
+        .fs_quotainfo           = fsfilt_ext3_quotainfo,
+        .fs_dquot               = fsfilt_ext3_dquot,
 };
 
 static int __init fsfilt_ext3_init(void)
index b463b88..054e6ec 100644 (file)
 
 #include <linux/obd.h>
 #include <linux/lustre_lib.h>
+#include <linux/lustre_quota.h>
 
 atomic_t obd_memory;
 int obd_memmax;
 
-
 /* Debugging check only needed during development */
 #ifdef OBD_CTXT_DEBUG
 # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC)
diff --git a/lustre/lvfs/quotacheck_test.c b/lustre/lvfs/quotacheck_test.c
new file mode 100644 (file)
index 0000000..0791981
--- /dev/null
@@ -0,0 +1,230 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2003 Cluster File Systems, Inc.
+ *   Author: Lai Siyao <lsy@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org/
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * A kernel module which tests the fsfilt quotacheck API from the OBD setup function.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/ext3_fs.h>
+#include <linux/ext3_jbd.h>
+#include <linux/version.h>
+#include <linux/bitops.h>
+
+#include <linux/obd_class.h>
+#include <linux/lustre_fsfilt.h>
+#include <linux/lustre_mds.h>
+#include <linux/obd_ost.h>
+
+char *test_quotafile[] = {"aquotacheck.user", "aquotacheck.group"};
+
+static inline struct ext3_group_desc *
+get_group_desc(struct super_block *sb, int group)
+{
+        unsigned long desc_block, desc;
+        struct ext3_group_desc *gdp;
+                                                                                                                 
+        desc_block = group / EXT3_DESC_PER_BLOCK(sb);
+        desc = group % EXT3_DESC_PER_BLOCK(sb);
+        gdp = (struct ext3_group_desc *)
+              EXT3_SB(sb)->s_group_desc[desc_block]->b_data;
+                                                                                                                 
+        return gdp + desc;
+}
+
+static inline struct buffer_head *
+read_inode_bitmap(struct super_block *sb, unsigned long group)
+{
+        struct ext3_group_desc *desc;
+        struct buffer_head *bh;
+                                                                                                                 
+        desc = get_group_desc(sb, group);
+        bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
+                                                                                                                 
+        return bh;
+}
+
+static inline struct inode *ext3_iget_inuse(struct super_block *sb,
+                                     struct buffer_head *bitmap_bh,
+                                     int index, unsigned long ino)
+{
+        struct inode *inode = NULL;
+                                                                                                                 
+        if (ext3_test_bit(index, bitmap_bh->b_data)) {
+                CERROR("i: %d, ino: %lu\n", index, ino);
+                ll_sleep(1);
+                inode = iget(sb, ino);
+        }
+                                                                                                                 
+        return inode;
+}
+
+static void print_inode(struct inode *inode)
+{
+        loff_t size = 0;
+
+        if (S_ISDIR(inode->i_mode) ||
+            S_ISREG(inode->i_mode) ||
+            S_ISLNK(inode->i_mode))
+                size = inode_get_bytes(inode);
+
+         CERROR("%lu: uid: %u, size: %llu, blocks: %lu, real size: %llu\n",
+               inode->i_ino, inode->i_uid, inode->i_size, inode->i_blocks, size);
+}
+
+/* Test quotaon */
+static int quotacheck_test_1(struct obd_device *obd, struct super_block *sb)
+{
+        struct ext3_sb_info *sbi = EXT3_SB(sb);
+        struct buffer_head *bitmap_bh = NULL;
+        struct inode *inode;
+        unsigned long ino;
+        int i, group;
+        ENTRY;
+
+        for (group = 0; group < sbi->s_groups_count; group++) {
+                ino = group * sbi->s_inodes_per_group + 1;
+                brelse(bitmap_bh);
+                bitmap_bh = read_inode_bitmap(sb, group);
+
+                if (group == 0) 
+                        CERROR("groups_count: %lu, inodes_per_group: %lu, first_ino: %u, inodes_count: %u\n",
+                               sbi->s_groups_count, sbi->s_inodes_per_group,
+                               sbi->s_first_ino, le32_to_cpu(sbi->s_es->s_inodes_count));
+
+                for (i = 0; i < sbi->s_inodes_per_group; i++, ino++) {
+                        if (ino < sbi->s_first_ino)
+                                continue;
+                        if (ino > le32_to_cpu(sbi->s_es->s_inodes_count)) {
+                                CERROR("bad inode number: %lu > s_inodes_count\n", ino);
+                                brelse(bitmap_bh);
+                                RETURN(-E2BIG);
+                        }
+                        inode = ext3_iget_inuse(sb, bitmap_bh, i, ino);
+                        if (inode)
+                                print_inode(inode);
+                        iput(inode);
+                }
+        }
+        brelse(bitmap_bh);
+
+        RETURN(0);
+}
+
+/* -------------------------------------------------------------------------
+ * Tests above, boring obd functions below
+ * ------------------------------------------------------------------------- */
+static int quotacheck_run_tests(struct obd_device *obd, struct obd_device *tgt)
+{
+        struct super_block *sb;
+        int rc;
+        ENTRY;
+
+        if (!strcmp(tgt->obd_type->typ_name, LUSTRE_MDS_NAME))
+                sb = tgt->u.mds.mds_sb;
+        else if (!strcmp(tgt->obd_type->typ_name, "obdfilter"))
+                sb = tgt->u.filter.fo_sb;
+        else {
+                CERROR("TARGET OBD should be mds or ost\n");
+                RETURN(-EINVAL);
+        }
+
+        rc = quotacheck_test_1(tgt, sb);
+
+        return rc;
+}
+
+static int quotacheck_test_cleanup(struct obd_device *obd)
+{
+        lprocfs_obd_cleanup(obd);
+        return 0;
+}
+
+static int quotacheck_test_setup(struct obd_device *obd, obd_count len, void *buf)
+{
+        struct lprocfs_static_vars lvars;
+        struct lustre_cfg *lcfg = buf;
+        struct obd_device *tgt;
+        int rc;
+        ENTRY;
+
+        if (lcfg->lcfg_bufcount < 1) {
+                CERROR("requires a mds OBD name\n");
+                RETURN(-EINVAL);
+        }
+
+        tgt = class_name2obd(lustre_cfg_string(lcfg, 1));
+        if (!tgt || !tgt->obd_attached || !tgt->obd_set_up) {
+                CERROR("target device not attached or not set up (%s)\n",
+                       lustre_cfg_string(lcfg, 1));
+                RETURN(-EINVAL);
+        }
+
+        rc = quotacheck_run_tests(obd, tgt);
+        if (rc)
+                quotacheck_test_cleanup(obd);
+
+        lprocfs_init_vars(quotacheck_test, &lvars);
+        lprocfs_obd_setup(obd, lvars.obd_vars);
+
+        RETURN(rc);
+}
+
+static struct obd_ops quotacheck_obd_ops = {
+        .o_owner       = THIS_MODULE,
+        .o_setup       = quotacheck_test_setup,
+        .o_cleanup     = quotacheck_test_cleanup,
+};
+
+static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+LPROCFS_INIT_VARS(quotacheck_test, lprocfs_module_vars, lprocfs_obd_vars)
+
+static int __init quotacheck_test_init(void)
+{
+        struct lprocfs_static_vars lvars;
+
+        lprocfs_init_vars(quotacheck_test, &lvars);
+        return class_register_type(&quotacheck_obd_ops, lvars.module_vars,
+                                   "quotacheck_test");
+}
+
+static void __exit quotacheck_test_exit(void)
+{
+        class_unregister_type("quotacheck_test");
+}
+
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("quotacheck test module");
+MODULE_LICENSE("GPL");
+
+module_init(quotacheck_test_init);
+module_exit(quotacheck_test_exit);
diff --git a/lustre/lvfs/quotactl_test.c b/lustre/lvfs/quotactl_test.c
new file mode 100644 (file)
index 0000000..c05efde
--- /dev/null
@@ -0,0 +1,374 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2003 Cluster File Systems, Inc.
+ *   Author: Lai Siyao <lsy@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org/
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * A kernel module which tests the fsfilt quotactl API from the OBD setup function.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/obd_class.h>
+#include <linux/lustre_fsfilt.h>
+#include <linux/lustre_mds.h>
+#include <linux/obd_ost.h>
+
+char *test_quotafile[] = {"aquotactl.user", "aquotactl.group"};
+
+/* Test quotaon */
+static int quotactl_test_1(struct obd_device *obd, struct super_block *sb)
+{
+        struct obd_quotactl oqctl;
+        int rc;
+        ENTRY;
+
+        oqctl.qc_cmd = Q_QUOTAON;
+        oqctl.qc_id = QFMT_LDISKFS;
+        oqctl.qc_type = UGQUOTA;
+        rc = fsfilt_quotactl(obd, sb, &oqctl);
+        if (rc) {
+                CERROR("1a: quotactl Q_QUOTAON failed: %d\n", rc);
+                RETURN(rc);
+        }
+
+        RETURN(0);
+}
+
+#if 0 /* set/getinfo not supported, this is for cluster-wide quotas */
+/* Test set/getinfo */
+static int quotactl_test_2(struct obd_device *obd, struct super_block *sb)
+{
+        struct obd_quotactl oqctl;
+        int rc;
+        ENTRY;
+
+        oqctl.qc_cmd = Q_SETINFO;
+        oqctl.qc_type = USRQUOTA;
+        oqctl.qc_dqinfo.dqi_bgrace = 1616;
+        oqctl.qc_dqinfo.dqi_igrace = 2828;
+        oqctl.qc_dqinfo.dqi_flags = 0;
+        rc = fsfilt_quotactl(obd, sb, &oqctl);
+        if (rc) {
+                CERROR("2a: quotactl Q_SETINFO failed: %d\n", rc);
+                RETURN(rc);
+        }
+
+        oqctl.qc_cmd = Q_GETINFO;
+        oqctl.qc_type = USRQUOTA;
+        rc = fsfilt_quotactl(obd, sb, &oqctl);
+        if (rc) {
+                CERROR("2b: quotactl Q_GETINFO failed: %d\n", rc);
+                RETURN(rc);
+        }
+        if (oqctl.qc_dqinfo.dqi_bgrace != 1616 ||
+            oqctl.qc_dqinfo.dqi_igrace != 2828 ||
+            oqctl.qc_dqinfo.dqi_flags != 0) {
+                CERROR("2c: quotactl Q_GETINFO get wrong result: %d, %d, %d\n",
+                       oqctl.qc_dqinfo.dqi_bgrace,
+                       oqctl.qc_dqinfo.dqi_igrace,
+                       oqctl.qc_dqinfo.dqi_flags);
+                RETURN(-EINVAL);
+        }
+
+        RETURN(0);
+}
+#endif
+       
+/* Test set/getquota */
+static int quotactl_test_3(struct obd_device *obd, struct super_block *sb)
+{
+        struct obd_quotactl oqctl;
+        int rc;
+        ENTRY;
+
+        oqctl.qc_cmd = Q_SETQUOTA;
+        oqctl.qc_type = USRQUOTA;
+        oqctl.qc_id = 500;
+        oqctl.qc_dqblk.dqb_bhardlimit = 919;
+        oqctl.qc_dqblk.dqb_bsoftlimit = 818;
+        oqctl.qc_dqblk.dqb_ihardlimit = 616;
+        oqctl.qc_dqblk.dqb_isoftlimit = 515;
+        oqctl.qc_dqblk.dqb_valid = QIF_LIMITS;
+        rc = fsfilt_quotactl(obd, sb, &oqctl);
+        if (rc) {
+                CERROR("3a: quotactl Q_SETQUOTA failed: %d\n", rc);
+                RETURN(rc);
+        }
+
+        oqctl.qc_cmd = Q_GETQUOTA;
+        oqctl.qc_type = USRQUOTA;
+        oqctl.qc_id = 500;
+        rc = fsfilt_quotactl(obd, sb, &oqctl);
+        if (rc) {
+                CERROR("3b: quotactl Q_SETQUOTA failed: %d\n", rc);
+                RETURN(rc);
+        }
+        if (oqctl.qc_dqblk.dqb_bhardlimit != 919 ||
+            oqctl.qc_dqblk.dqb_bsoftlimit != 818 ||
+            oqctl.qc_dqblk.dqb_ihardlimit != 616 ||
+            oqctl.qc_dqblk.dqb_isoftlimit != 515) {
+                CERROR("3c: quotactl Q_GETQUOTA get wrong result:"
+                       "%llu, %llu, %llu, %llu\n",
+                       oqctl.qc_dqblk.dqb_bhardlimit,
+                       oqctl.qc_dqblk.dqb_bsoftlimit,
+                       oqctl.qc_dqblk.dqb_ihardlimit,
+                       oqctl.qc_dqblk.dqb_isoftlimit);
+                RETURN(-EINVAL);
+        }
+
+        oqctl.qc_cmd = Q_SETQUOTA;
+        oqctl.qc_type = USRQUOTA;
+        oqctl.qc_id = 500;
+        oqctl.qc_dqblk.dqb_curspace = 717;
+        oqctl.qc_dqblk.dqb_curinodes = 414;
+        oqctl.qc_dqblk.dqb_valid = QIF_USAGE;
+        rc = fsfilt_quotactl(obd, sb, &oqctl);
+        if (rc) {
+                CERROR("3d: quotactl Q_SETQUOTA failed: %d\n", rc);
+                RETURN(rc);
+        }
+
+        oqctl.qc_cmd = Q_GETQUOTA;
+        oqctl.qc_type = USRQUOTA;
+        oqctl.qc_id = 500;
+        rc = fsfilt_quotactl(obd, sb, &oqctl);
+        if (rc) {
+                CERROR("3e: quotactl Q_SETQUOTA failed: %d\n", rc);
+                RETURN(rc);
+        }
+        if (oqctl.qc_dqblk.dqb_curspace != 717 ||
+            oqctl.qc_dqblk.dqb_curinodes != 414) {
+                CERROR("3f: quotactl Q_GETQUOTA get wrong result: %llu, %llu\n",
+                       oqctl.qc_dqblk.dqb_curspace,
+                       oqctl.qc_dqblk.dqb_curinodes);
+                RETURN(-EINVAL);
+        }
+
+        oqctl.qc_cmd = Q_SETQUOTA;
+        oqctl.qc_type = USRQUOTA;
+        oqctl.qc_dqblk.dqb_btime = 313;
+        oqctl.qc_dqblk.dqb_itime = 212;
+        oqctl.qc_id = 500;
+        oqctl.qc_dqblk.dqb_valid = QIF_TIMES;
+        rc = fsfilt_quotactl(obd, sb, &oqctl);
+        if (rc) {
+                CERROR("3g: quotactl Q_SETQUOTA failed: %d\n", rc);
+                RETURN(rc);
+        }
+
+        oqctl.qc_cmd = Q_GETQUOTA;
+        oqctl.qc_type = USRQUOTA;
+        oqctl.qc_id = 500;
+        rc = fsfilt_quotactl(obd, sb, &oqctl);
+        if (rc) {
+                CERROR("3h: quotactl Q_SETQUOTA failed: %d\n", rc);
+                RETURN(rc);
+        }
+        if (oqctl.qc_dqblk.dqb_btime != 313 ||
+            oqctl.qc_dqblk.dqb_itime != 212) {
+                CERROR("3i: quotactl Q_GETQUOTA get wrong result: %llu, %llu\n",
+                       oqctl.qc_dqblk.dqb_btime,
+                       oqctl.qc_dqblk.dqb_itime);
+                RETURN(-EINVAL);
+        }
+
+        oqctl.qc_cmd = Q_SETQUOTA;
+        oqctl.qc_type = USRQUOTA;
+        oqctl.qc_id = 500;
+        oqctl.qc_dqblk.dqb_bhardlimit = 919;
+        oqctl.qc_dqblk.dqb_bsoftlimit = 818;
+        oqctl.qc_dqblk.dqb_curspace = 717;
+        oqctl.qc_dqblk.dqb_ihardlimit = 616;
+        oqctl.qc_dqblk.dqb_isoftlimit = 515;
+        oqctl.qc_dqblk.dqb_curinodes = 414;
+        oqctl.qc_dqblk.dqb_btime = 313;
+        oqctl.qc_dqblk.dqb_itime = 212;
+        oqctl.qc_dqblk.dqb_valid = QIF_ALL;
+        rc = fsfilt_quotactl(obd, sb, &oqctl);
+        if (rc) {
+                CERROR("3j: quotactl Q_SETQUOTA failed: %d\n", rc);
+                RETURN(rc);
+        }
+
+        oqctl.qc_cmd = Q_GETQUOTA;
+        oqctl.qc_type = USRQUOTA;
+        oqctl.qc_id = 500;
+        rc = fsfilt_quotactl(obd, sb, &oqctl);
+        if (rc) {
+                CERROR("3k: quotactl Q_SETQUOTA failed: %d\n", rc);
+                RETURN(rc);
+        }
+        if (oqctl.qc_dqblk.dqb_bhardlimit != 919 ||
+            oqctl.qc_dqblk.dqb_bsoftlimit != 818 ||
+            oqctl.qc_dqblk.dqb_ihardlimit != 616 ||
+            oqctl.qc_dqblk.dqb_isoftlimit != 515 ||
+            oqctl.qc_dqblk.dqb_curspace != 717 ||
+            oqctl.qc_dqblk.dqb_curinodes != 414 ||
+            oqctl.qc_dqblk.dqb_btime != 0 ||
+            oqctl.qc_dqblk.dqb_itime != 0) {
+                CERROR("3l: quotactl Q_GETQUOTA get wrong result:"
+                       "%llu, %llu, %llu, %llu, %llu, %llu, %llu, %llu\n",
+                       oqctl.qc_dqblk.dqb_bhardlimit,
+                       oqctl.qc_dqblk.dqb_bsoftlimit,
+                       oqctl.qc_dqblk.dqb_ihardlimit,
+                       oqctl.qc_dqblk.dqb_isoftlimit,
+                       oqctl.qc_dqblk.dqb_curspace,
+                       oqctl.qc_dqblk.dqb_curinodes,
+                       oqctl.qc_dqblk.dqb_btime,
+                       oqctl.qc_dqblk.dqb_itime);
+                RETURN(-EINVAL);
+        }
+
+        RETURN(0);
+}
+
+/* Test quotaoff */
+static int quotactl_test_4(struct obd_device *obd, struct super_block *sb)
+{
+        struct obd_quotactl oqctl;
+        int rc;
+        ENTRY;
+
+        oqctl.qc_cmd = Q_QUOTAOFF;
+        oqctl.qc_id = 500;
+        oqctl.qc_type = UGQUOTA;
+        rc = fsfilt_quotactl(obd, sb, &oqctl);
+        if (rc) {
+                CERROR("4a: quotactl Q_QUOTAOFF failed: %d\n", rc);
+                RETURN(rc);
+        }
+
+        RETURN(0);
+}
+
+/* -------------------------------------------------------------------------
+ * Tests above, boring obd functions below
+ * ------------------------------------------------------------------------- */
+static int quotactl_run_tests(struct obd_device *obd, struct obd_device *tgt)
+{
+        struct super_block *sb;
+        struct obd_run_ctxt saved;
+        int rc;
+        ENTRY;
+
+        if (!strcmp(tgt->obd_type->typ_name, LUSTRE_MDS_NAME))
+                sb = tgt->u.mds.mds_sb;
+        else if (!strcmp(tgt->obd_type->typ_name, "obdfilter"))
+                sb = tgt->u.filter.fo_sb;
+        else {
+                CERROR("TARGET OBD should be mds or obdfilter\n");
+                RETURN(-EINVAL);
+        }
+
+        push_ctxt(&saved, &tgt->obd_ctxt, NULL);
+
+        rc = quotactl_test_1(tgt, sb);
+        if (rc)
+                GOTO(cleanup, rc);
+
+#if 0
+        rc = quotactl_test_2(tgt, sb);
+        if (rc)
+                GOTO(cleanup, rc);
+#endif
+
+        rc = quotactl_test_3(tgt, sb);
+        if (rc)
+                GOTO(cleanup, rc);
+
+ cleanup:
+        quotactl_test_4(tgt, sb);
+
+        pop_ctxt(&saved, &tgt->obd_ctxt, NULL);
+
+        return rc;
+}
+
+static int quotactl_test_cleanup(struct obd_device *obd)
+{
+        lprocfs_obd_cleanup(obd);
+        return 0;
+}
+
+static int quotactl_test_setup(struct obd_device *obd, obd_count len, void *buf)
+{
+        struct lprocfs_static_vars lvars;
+        struct lustre_cfg *lcfg = buf;
+        struct obd_device *tgt;
+        int rc;
+        ENTRY;
+
+        if (lcfg->lcfg_bufcount < 1) {
+                CERROR("requires a mds OBD name\n");
+                RETURN(-EINVAL);
+        }
+
+        tgt = class_name2obd(lustre_cfg_string(lcfg, 1));
+        if (!tgt || !tgt->obd_attached || !tgt->obd_set_up) {
+                CERROR("target device not attached or not set up (%s)\n",
+                       lustre_cfg_string(lcfg, 1));
+                RETURN(-EINVAL);
+        }
+
+        lprocfs_init_vars(quotactl_test, &lvars);
+        lprocfs_obd_setup(obd, lvars.obd_vars);
+
+        rc = quotactl_run_tests(obd, tgt);
+
+        quotactl_test_cleanup(obd);
+
+        RETURN(rc);
+}
+
+static struct obd_ops quotactl_obd_ops = {
+        .o_owner       = THIS_MODULE,
+        .o_setup       = quotactl_test_setup,
+        .o_cleanup     = quotactl_test_cleanup,
+};
+
+static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+LPROCFS_INIT_VARS(quotactl_test, lprocfs_module_vars, lprocfs_obd_vars)
+
+static int __init quotactl_test_init(void)
+{
+        struct lprocfs_static_vars lvars;
+
+        lprocfs_init_vars(quotactl_test, &lvars);
+        return class_register_type(&quotactl_obd_ops, lvars.module_vars,
+                                   "quotactl_test");
+}
+
+static void __exit quotactl_test_exit(void)
+{
+        class_unregister_type("quotactl_test");
+}
+
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("quotactl test module");
+MODULE_LICENSE("GPL");
+
+module_init(quotactl_test_init);
+module_exit(quotactl_test_exit);
index d6e7b50..4702f5d 100644 (file)
@@ -221,7 +221,7 @@ static void mdc_realloc_openmsg(struct ptlrpc_request *req,
         OBD_ALLOC(new_msg, new_size);
         if (new_msg != NULL) {
                 struct lustre_msg *old_msg = req->rq_reqmsg;
-                long irqflags;
+                unsigned long irqflags;
 
                 DEBUG_REQ(D_INFO, req, "replace reqmsg for larger EA %u\n",
                           body->eadatasize);
index 4c21f5a..7ba104f 100644 (file)
@@ -611,6 +611,97 @@ int mdc_readpage(struct obd_export *exp, struct ll_fid *mdc_fid, __u64 offset,
         return rc;
 }
 
+static int mdc_quotacheck(struct obd_export *exp, struct obd_quotactl *oqctl)
+{
+        struct client_obd *cli = &exp->exp_obd->u.cli;
+        struct ptlrpc_request *req;
+        struct obd_quotactl *body;
+        int size = sizeof(*body);
+        int rc;
+        ENTRY;
+
+        req = ptlrpc_prep_req(class_exp2cliimp(exp), MDS_QUOTACHECK, 1, &size,
+                              NULL);
+        if (!req)
+                GOTO(out, rc = -ENOMEM);
+
+        body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body));
+        memcpy(body, oqctl, sizeof(*body));
+
+        req->rq_replen = lustre_msg_size(0, NULL);
+
+        spin_lock(&cli->cl_qchk_lock);
+        cli->cl_qchk_stat = CL_QUOTACHECKING;
+        spin_unlock(&cli->cl_qchk_lock);
+        rc = ptlrpc_queue_wait(req);
+        if (rc) {
+                spin_lock(&cli->cl_qchk_lock);
+                cli->cl_qchk_stat = rc;
+                spin_unlock(&cli->cl_qchk_lock);
+        }
+out:
+        ptlrpc_req_finished(req);
+        RETURN (rc);
+}
+
+static int mdc_poll_quotacheck(struct obd_export *exp,
+                               struct if_quotacheck *qchk)
+{
+        struct client_obd *cli = &exp->exp_obd->u.cli;
+        int stat;
+        ENTRY;
+                                                                                                                 
+        spin_lock(&cli->cl_qchk_lock);
+        stat = cli->cl_qchk_stat;
+        spin_unlock(&cli->cl_qchk_lock);
+                                                                                                                 
+        qchk->stat = stat;
+        if (stat == CL_QUOTACHECKING) {
+                qchk->stat = -ENODATA;
+                stat = 0;
+        } else if (stat) {
+                if (qchk->stat > CL_QUOTACHECKING)
+                        qchk->stat = stat = -EINTR;
+                                                                                                                 
+                strncpy(qchk->obd_type, LUSTRE_MDS_NAME, 10);
+                qchk->obd_uuid = cli->cl_import->imp_target_uuid;
+        }
+        RETURN(stat);
+}
+
+static int mdc_quotactl(struct obd_export *exp, struct obd_quotactl *oqctl)
+{
+        struct ptlrpc_request *req;
+        struct obd_quotactl *oqc;
+        int size = sizeof(*oqctl);
+        int rc;
+        ENTRY;
+
+        req = ptlrpc_prep_req(class_exp2cliimp(exp), MDS_QUOTACTL, 1, &size,
+                              NULL);
+        if (!req)
+                GOTO(out, rc = -ENOMEM);
+
+        memcpy(lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*oqctl)), oqctl, size);
+
+        req->rq_replen = lustre_msg_size(1, &size);
+
+        rc = ptlrpc_queue_wait(req);
+        if (!rc) {
+                oqc = lustre_swab_repbuf(req, 0, sizeof (*oqc),
+                                         lustre_swab_obd_quotactl);
+                if (oqc == NULL) {
+                        CERROR ("Can't unpack mds_body\n");
+                        GOTO(out, rc = -EPROTO);
+                }
+
+                memcpy(oqctl, oqc, sizeof(*oqctl));
+        }
+out:
+        ptlrpc_req_finished(req);
+        RETURN (rc);
+}
+
 static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                          void *karg, void *uarg)
 {
@@ -652,6 +743,9 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                 GOTO(out, rc);
         }
 #endif
+        case OBD_IOC_POLL_QUOTACHECK:
+                rc = mdc_poll_quotacheck(exp, (struct if_quotacheck *)karg);
+                GOTO(out, rc);
         default:
                 CERROR("mdc_ioctl(): unrecognised ioctl %#x\n", cmd);
                 GOTO(out, rc = -ENOTTY);
@@ -1045,6 +1139,8 @@ struct obd_ops mdc_obd_ops = {
         .o_import_event = mdc_import_event,
         .o_llog_init    = mdc_llog_init,
         .o_llog_finish  = mdc_llog_finish,
+        .o_quotacheck   = mdc_quotacheck,
+        .o_quotactl     = mdc_quotactl,
 };
 
 int __init mdc_init(void)
index 9063c80..b3b3648 100644 (file)
@@ -2,4 +2,8 @@ MODULES := mds
 mds-objs := mds_log.o mds_unlink_open.o mds_lov.o handler.o mds_reint.o
 mds-objs += mds_fs.o lproc_mds.o mds_open.o mds_lib.o
 
+ifeq ($(PATCHLEVEL),6)
+mds-objs += quota_context.o quota_master.o
+endif
+
 @INCLUDE_RULES@
index b05a4b5..91277b5 100644 (file)
@@ -8,4 +8,5 @@ modulefs_DATA = mds$(KMODEXT)
 endif
 
 MOSTLYCLEANFILES = *.o *.ko *.mod.c
-DIST_SOURCES = $(mds-objs:%.o=%.c) mds_internal.h
+DIST_SOURCES := $(mds-objs:%.o=%.c) mds_internal.h 
+DIST_SOURCES += quota_context.c quota_master.c
index 50853b9..f722340 100644 (file)
 #include <linux/lustre_fsfilt.h>
 #include <linux/lprocfs_status.h>
 #include <linux/lustre_commit_confd.h>
+#include <linux/lustre_quota.h>
 
 #include "mds_internal.h"
 
+static struct quotacheck_info qchkinfo;
+
 static int mds_intent_policy(struct ldlm_namespace *ns,
                              struct ldlm_lock **lockp, void *req_cookie,
                              ldlm_mode_t mode, int flags, void *data);
@@ -1011,6 +1014,205 @@ out:
         RETURN(0);
 }
 
+static int mds_quotacheck_callback(struct obd_export *exp,
+                                   struct obd_quotactl *oqctl)
+{
+        struct ptlrpc_request *req;
+        struct obd_quotactl *body;
+        int rc, size = sizeof(*oqctl);
+
+        req = ptlrpc_prep_req(exp->exp_imp_reverse, OBD_QC_CALLBACK,
+                              1, &size, NULL);
+        if (!req)
+                RETURN(-ENOMEM);
+
+        body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body));
+        memcpy(body, oqctl, sizeof(*oqctl));
+
+        req->rq_replen = lustre_msg_size(0, NULL);
+
+        rc = ptlrpc_queue_wait(req);
+        ptlrpc_req_finished(req);
+
+        RETURN(rc);
+}
+
+
+static int mds_quotacheck_thread(void *data)
+{
+        unsigned long flags;
+        struct quotacheck_info *qchki = data;
+        struct obd_device *obd;
+        struct obd_export *exp;
+        struct obd_quotactl *oqctl;
+        struct obd_run_ctxt saved;
+        int rc;
+                                                                                                                 
+        lock_kernel();
+        ptlrpc_daemonize();
+                                                                                                                 
+        SIGNAL_MASK_LOCK(current, flags);
+        sigfillset(&current->blocked);
+        RECALC_SIGPENDING;
+        SIGNAL_MASK_UNLOCK(current, flags);
+
+        THREAD_NAME(current->comm, sizeof(current->comm) - 1, "%s", "quotacheck");
+        unlock_kernel();
+
+        complete(&qchki->qi_starting);
+
+        exp = qchki->qi_exp;
+        oqctl = &qchki->qi_oqctl;
+        obd = exp->exp_obd;
+
+        push_ctxt(&saved, &obd->obd_ctxt, NULL);
+
+        rc = fsfilt_quotacheck(obd, obd->u.mds.mds_sb, oqctl);
+        if (rc)
+                CERROR("%s: fsfilt_quotacheck: %d\n", obd->obd_name, rc);
+
+        pop_ctxt(&saved, &obd->obd_ctxt, NULL);
+
+        rc = mds_quotacheck_callback(exp, oqctl);
+
+        atomic_inc(&obd->u.mds.mds_quotachecking);
+
+        return rc;
+}
+
+static int mds_quotacheck(struct ptlrpc_request *req)
+{
+        struct obd_device *obd = req->rq_export->exp_obd;
+        struct mds_obd *mds = &obd->u.mds;
+        struct obd_quotactl *oqctl;
+        int rc = 0;
+        ENTRY;
+
+        oqctl = lustre_swab_reqbuf(req, 0, sizeof(*oqctl),
+                                   lustre_swab_obd_quotactl);
+        if (oqctl == NULL)
+                RETURN(-EPROTO);
+
+        rc = lustre_pack_reply(req, 0, NULL, NULL);
+        if (rc) {
+                CERROR("mds: out of memory while packing quotacheck reply\n");
+                RETURN(rc);
+        }
+
+        /* XXX: quotaoff */
+        GOTO(out, rc = -EOPNOTSUPP);
+
+        if (!atomic_dec_and_test(&mds->mds_quotachecking)) {
+                atomic_inc(&mds->mds_quotachecking);
+                GOTO(out, rc = -EBUSY);
+        }
+
+        init_completion(&qchkinfo.qi_starting);
+        qchkinfo.qi_exp = req->rq_export;
+        memcpy(&qchkinfo.qi_oqctl, oqctl, sizeof(*oqctl));
+
+        rc = init_admin_quotafiles(obd, &qchkinfo.qi_oqctl);
+        if (rc) {
+                CERROR("init_admin_quotafiles failed: %d\n", rc);
+                atomic_inc(&mds->mds_quotachecking);
+                GOTO(out, rc);
+        }
+                
+        rc = kernel_thread(mds_quotacheck_thread, &qchkinfo, CLONE_VM|CLONE_FILES);
+        if (rc < 0) {
+                CERROR("%s: error starting mds_quotacheck_thread: %d\n",
+                       obd->obd_name, rc);
+                atomic_inc(&mds->mds_quotachecking);
+        } else {
+                CDEBUG(D_INFO, "%s: mds_quotacheck_thread: %d\n",
+                       obd->obd_name, rc);
+                wait_for_completion(&qchkinfo.qi_starting);
+                rc = 0;
+        }
+out:
+        req->rq_status = rc;
+        RETURN(0);
+}
+
+static int mds_quotactl(struct ptlrpc_request *req)
+{
+        struct obd_device *obd = req->rq_export->exp_obd;
+        struct obd_quotactl *oqctl, *repoqc;
+        struct obd_run_ctxt saved;
+        int rc = 0, size = sizeof(*repoqc);
+        ENTRY;
+
+        oqctl = lustre_swab_reqbuf(req, 0, sizeof(*oqctl),
+                                   lustre_swab_obd_quotactl);
+        if (oqctl == NULL)
+                RETURN(-EPROTO);
+
+        rc = lustre_pack_reply(req, 1, &size, NULL);
+        if (rc)
+                RETURN(rc);
+
+        /* XXX: quotaoff */
+        GOTO(out, rc = -EOPNOTSUPP);
+
+        repoqc = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repoqc));
+        memcpy(repoqc, oqctl, sizeof(*repoqc));
+
+        switch (repoqc->qc_cmd) {
+        case Q_QUOTAON:
+                rc = mds_quota_on(obd, repoqc);
+                break;
+        case Q_QUOTAOFF:
+                mds_quota_off(obd, repoqc);
+                break;
+        case Q_SETINFO:
+                rc = mds_set_dqinfo(obd, repoqc);
+                break;
+        case Q_GETINFO:
+                rc = mds_get_dqinfo(obd, repoqc);
+                break;
+        case Q_SETQUOTA:
+                rc = mds_set_dqblk(obd, repoqc);
+                break;
+        case Q_GETQUOTA:
+                rc = mds_get_dqblk(obd, repoqc);
+                break;
+        case Q_GETOINFO:
+        case Q_GETOQUOTA:
+                break;
+        default:
+                CERROR("%s: unsupported mds_quotactl command: %d\n",
+                       obd->obd_name, repoqc->qc_cmd);
+                LBUG();
+        }
+
+        if (rc) {
+                CDEBUG(D_INFO, "mds_quotactl admin op failed: rc = %d\n", rc);
+                GOTO(out, rc);
+        }
+
+        if (repoqc->qc_cmd == Q_QUOTAON || repoqc->qc_cmd == Q_QUOTAOFF ||
+            Q_GETOCMD(repoqc) || repoqc->qc_cmd == Q_GETQUOTA) {
+                struct obd_quotactl *loqc = repoqc;
+
+                if (repoqc->qc_cmd == Q_GETQUOTA)
+                        loqc = oqctl;
+
+                push_ctxt(&saved, &obd->obd_ctxt, NULL);
+                rc = fsfilt_quotactl(obd, obd->u.mds.mds_sb, loqc);
+                pop_ctxt(&saved, &obd->obd_ctxt, NULL);
+
+                if (!rc && loqc->qc_cmd == Q_GETQUOTA) {
+                        repoqc->qc_dqblk.dqb_curinodes +=
+                                                loqc->qc_dqblk.dqb_curinodes;
+                        repoqc->qc_dqblk.dqb_curspace +=
+                                                loqc->qc_dqblk.dqb_curspace;
+                }
+        }
+out:
+        req->rq_status = rc;
+        RETURN(0);
+}
+
 int mds_reint(struct ptlrpc_request *req, int offset,
               struct lustre_handle *lockh)
 {
@@ -1299,6 +1501,18 @@ int mds_handle(struct ptlrpc_request *req)
                 rc = mds_set_info(req->rq_export, req);
                 break;
 
+        case MDS_QUOTACHECK:
+                DEBUG_REQ(D_INODE, req, "quotacheck");
+                OBD_FAIL_RETURN(OBD_FAIL_MDS_QUOTACHECK_NET, 0);
+                rc = mds_quotacheck(req);
+                break;
+
+        case MDS_QUOTACTL:
+                DEBUG_REQ(D_INODE, req, "quotactl");
+                OBD_FAIL_RETURN(OBD_FAIL_MDS_QUOTACTL_NET, 0);
+                rc = mds_quotactl(req);
+                break;
+
         case OBD_PING:
                 DEBUG_REQ(D_INODE, req, "ping");
                 rc = target_handle_ping(req);
@@ -1469,13 +1683,16 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf)
         CDEBUG(D_SUPER, "%s: mnt = %p\n", lustre_cfg_string(lcfg, 1), mnt);
 
         LASSERT(!ll_check_rdonly(ll_sbdev(mnt->mnt_sb)));
-        
+
+        sema_init(&mds->mds_quota_info.qi_sem, 1);
         sema_init(&mds->mds_orphan_recovery_sem, 1);
         sema_init(&mds->mds_epoch_sem, 1);
         spin_lock_init(&mds->mds_transno_lock);
         mds->mds_max_mdsize = sizeof(struct lov_mds_md);
         mds->mds_max_cookiesize = sizeof(struct llog_cookie);
 
+        atomic_set(&mds->mds_quotachecking, 1);
+
         sprintf(ns_name, "mds-%s", obd->obd_uuid.uuid);
         obd->obd_namespace = ldlm_namespace_new(ns_name, LDLM_NAMESPACE_SERVER);
         if (obd->obd_namespace == NULL) {
@@ -1542,6 +1759,14 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf)
                               obd->obd_replayable ? "enabled" : "disabled");
         }
 
+        sema_init(&mds->mds_quota_info.qi_sem, 1);
+        rc = qctxt_init(&mds->mds_quota_ctxt, mds->mds_sb, dqacq_handler);
+        if (rc) {
+                CERROR("initialize quota context failed! (rc:%d)\n", rc);
+                qctxt_cleanup(&mds->mds_quota_ctxt, 0);
+                GOTO(err_fs, rc);
+        }
+
         RETURN(0);
 
 err_fs:
@@ -1624,7 +1849,7 @@ int mds_postrecov(struct obd_device *obd)
         int rc, item = 0;
 
         LASSERT(!obd->obd_recovering);
-        LASSERT(llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT) != NULL);
+        LASSERT(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT) != NULL);
 
         /* set nextid first, so we are sure it happens */
         rc = mds_lov_set_nextid(obd);
@@ -1647,7 +1872,7 @@ int mds_postrecov(struct obd_device *obd)
         if (rc)
                 GOTO(out, rc);
 
-        rc = llog_connect(llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT),
+        rc = llog_connect(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT),
                           obd->u.mds.mds_lov_desc.ld_tgt_count,
                           NULL, NULL, NULL);
         if (rc) {
@@ -1733,6 +1958,8 @@ static int mds_cleanup(struct obd_device *obd)
 
         lprocfs_obd_cleanup(obd);
 
+        qctxt_cleanup(&mds->mds_quota_ctxt, 0);
+
         mds_update_server_data(obd, 1);
         if (mds->mds_lov_objids != NULL) {
                 OBD_FREE(mds->mds_lov_objids,
@@ -2116,8 +2343,13 @@ static struct obd_ops mdt_obd_ops = {
 
 static int __init mds_init(void)
 {
+        int rc;
         struct lprocfs_static_vars lvars;
 
+        rc = lustre_dquot_init();
+        if (rc)
+                return rc;
+        
         lprocfs_init_vars(mds, &lvars);
         class_register_type(&mds_obd_ops, lvars.module_vars, LUSTRE_MDS_NAME);
         lprocfs_init_vars(mdt, &lvars);
@@ -2128,6 +2360,8 @@ static int __init mds_init(void)
 
 static void /*__exit*/ mds_exit(void)
 {
+        lustre_dquot_exit();
+
         class_unregister_type(LUSTRE_MDS_NAME);
         class_unregister_type(LUSTRE_MDT_NAME);
 }
index 543d296..74795b4 100644 (file)
@@ -49,6 +49,121 @@ static int lprocfs_mds_rd_mntdev(char *page, char **start, off_t off, int count,
         return snprintf(page, count, "%s\n",obd->u.mds.mds_vfsmnt->mnt_devname);
 }
 
+static int lprocfs_mds_rd_bunit(char *page, char **start, off_t off, int count, 
+                                int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+        return snprintf(page, count, "%lu\n", 
+                        obd->u.mds.mds_quota_ctxt.lqc_bunit_sz);
+}
+
+static int lprocfs_mds_rd_iunit(char *page, char **start, off_t off, int count, 
+                                int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+        return snprintf(page, count, "%lu\n", 
+                        obd->u.mds.mds_quota_ctxt.lqc_iunit_sz);
+}
+
+static int lprocfs_mds_wr_bunit(struct file *file, const char *buffer,
+                                unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        int val, rc = 0;
+        LASSERT(obd != NULL);
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val % QUOTABLOCK_SIZE ||
+            val <= obd->u.mds.mds_quota_ctxt.lqc_btune_sz)
+                return -EINVAL;
+
+        obd->u.mds.mds_quota_ctxt.lqc_bunit_sz = val;
+        return count;
+}
+
+static int lprocfs_mds_wr_iunit(struct file *file, const char *buffer,
+                                unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        int val, rc = 0;
+        LASSERT(obd != NULL);
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val <= obd->u.mds.mds_quota_ctxt.lqc_itune_sz)
+                return -EINVAL;
+
+        obd->u.mds.mds_quota_ctxt.lqc_iunit_sz = val;
+        return count;
+}
+
+static int lprocfs_mds_rd_btune(char *page, char **start, off_t off, int count, 
+                                int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+        return snprintf(page, count, "%lu\n", 
+                        obd->u.mds.mds_quota_ctxt.lqc_btune_sz);
+}
+
+static int lprocfs_mds_rd_itune(char *page, char **start, off_t off, int count, 
+                                int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+        return snprintf(page, count, "%lu\n", 
+                        obd->u.mds.mds_quota_ctxt.lqc_itune_sz);
+}
+
+static int lprocfs_mds_wr_btune(struct file *file, const char *buffer,
+                                unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        int val, rc = 0;
+        LASSERT(obd != NULL);
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+        
+        if (val <= QUOTABLOCK_SIZE * MIN_QLIMIT || val % QUOTABLOCK_SIZE || 
+            val >= obd->u.mds.mds_quota_ctxt.lqc_bunit_sz)
+                return -EINVAL;
+
+        obd->u.mds.mds_quota_ctxt.lqc_btune_sz = val;
+        return count;
+}
+
+static int lprocfs_mds_wr_itune(struct file *file, const char *buffer,
+                                unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        int val, rc = 0;
+        LASSERT(obd != NULL);
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+        
+        if (val <= MIN_QLIMIT || 
+            val >= obd->u.mds.mds_quota_ctxt.lqc_iunit_sz)
+                return -EINVAL;
+
+        obd->u.mds.mds_quota_ctxt.lqc_itune_sz = val;
+        return count;
+}
+
 struct lprocfs_vars lprocfs_mds_obd_vars[] = {
         { "uuid",         lprocfs_rd_uuid,        0, 0 },
         { "blocksize",    lprocfs_rd_blksize,     0, 0 },
@@ -62,6 +177,10 @@ struct lprocfs_vars lprocfs_mds_obd_vars[] = {
         { "recovery_status", lprocfs_obd_rd_recovery_status, 0, 0 },
         { "evict_client", 0, lprocfs_wr_evict_client, 0 },
         { "num_exports",  lprocfs_rd_num_exports, 0, 0 },
+        { "quota_bunit_sz", lprocfs_mds_rd_bunit, lprocfs_mds_wr_bunit, 0 },
+        { "quota_btune_sz", lprocfs_mds_rd_btune, lprocfs_mds_wr_btune, 0 },
+        { "quota_iunit_sz", lprocfs_mds_rd_iunit, lprocfs_mds_wr_iunit, 0 },
+        { "quota_itune_sz", lprocfs_mds_rd_itune, lprocfs_mds_wr_itune, 0 },
         { 0 }
 };
 
index ffbf43f..a71a5c2 100644 (file)
@@ -31,6 +31,8 @@
 #include <linux/module.h>
 #include <linux/kmod.h>
 #include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/quotaops.h>
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
 #include <linux/mount.h>
 #endif
@@ -510,7 +512,7 @@ int mds_fs_cleanup(struct obd_device *obd)
 {
         struct mds_obd *mds = &obd->u.mds;
         struct obd_run_ctxt saved;
-        int rc = 0;
+        int i, rc = 0;
 
         if (obd->obd_fail)
                 CERROR("%s: shutting down for failover; client state will"
@@ -544,9 +546,21 @@ int mds_fs_cleanup(struct obd_device *obd)
                 l_dput(mds->mds_pending_dir);
                 mds->mds_pending_dir = NULL;
         }
+        
+        /* close admin quota files */
+        down(&mds->mds_quota_info.qi_sem);
+        for (i = 0; i < MAXQUOTAS; i++) {
+               if (mds->mds_quota_info.qi_files[i]) {
+                        filp_close(mds->mds_quota_info.qi_files[i], 0);
+                        mds->mds_quota_info.qi_files[i] = NULL;
+                }
+       }
+        up(&mds->mds_quota_info.qi_sem);
+
         pop_ctxt(&saved, &obd->obd_ctxt, NULL);
         shrink_dcache_parent(mds->mds_fid_de);
         dput(mds->mds_fid_de);
+        DQUOT_OFF(mds->mds_sb);
 
         return rc;
 }
@@ -565,13 +579,18 @@ int mds_obd_create(struct obd_export *exp, struct obdo *oa,
         struct obd_run_ctxt saved;
         char fidname[LL_FID_NAMELEN];
         void *handle;
+        struct obd_ucred ucred;
         int rc = 0, err, namelen;
         ENTRY;
 
-        push_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL);
+        /* the owner of object file should always be root */
+        memset(&ucred, 0, sizeof(ucred));
+        ucred.ouc_cap = current->cap_effective | CAP_SYS_RESOURCE;
+        
+        push_ctxt(&saved, &exp->exp_obd->obd_ctxt, &ucred);
 
         sprintf(fidname, "OBJECTS/%u.%u", tmpname, current->pid);
-        filp = filp_open(fidname, O_CREAT | O_EXCL, 0644);
+        filp = filp_open(fidname, O_CREAT | O_EXCL, 0666);
         if (IS_ERR(filp)) {
                 rc = PTR_ERR(filp);
                 if (rc == -EEXIST) {
@@ -632,7 +651,7 @@ out_close:
                         rc = err;
         }
 out_pop:
-        pop_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL);
+        pop_ctxt(&saved, &exp->exp_obd->obd_ctxt, &ucred);
         RETURN(rc);
 }
 
@@ -643,13 +662,16 @@ int mds_obd_destroy(struct obd_export *exp, struct obdo *oa,
         struct inode *parent_inode = mds->mds_objects_dir->d_inode;
         struct obd_device *obd = exp->exp_obd;
         struct obd_run_ctxt saved;
+        struct obd_ucred ucred;
         char fidname[LL_FID_NAMELEN];
         struct dentry *de;
         void *handle;
         int err, namelen, rc = 0;
         ENTRY;
-
-        push_ctxt(&saved, &obd->obd_ctxt, NULL);
+        
+        memset(&ucred, 0, sizeof(ucred));
+        ucred.ouc_cap = current->cap_effective | CAP_SYS_RESOURCE;
+        push_ctxt(&saved, &obd->obd_ctxt, &ucred);
 
         namelen = ll_fid2str(fidname, oa->o_id, oa->o_generation);
 
@@ -688,6 +710,7 @@ out_dput:
         if (de != NULL)
                 l_dput(de);
         up(&parent_inode->i_sem);
-        pop_ctxt(&saved, &obd->obd_ctxt, NULL);
+
+        pop_ctxt(&saved, &obd->obd_ctxt, &ucred);
         RETURN(rc);
 }
index 3ca6feb..792d6d0 100644 (file)
@@ -118,6 +118,9 @@ int mds_get_parent_child_locked(struct obd_device *obd, struct mds_obd *mds,
                                 struct dentry **dchildp, int child_mode);
 int mds_lock_new_child(struct obd_device *obd, struct inode *inode,
                        struct lustre_handle *child_lockh);
+int mds_osc_setattr_async(struct obd_device *obd, struct inode *inode,
+                          struct lov_mds_md *lmm, int lmm_size,
+                          struct llog_cookie *logcookies);
 
 /* mds/mds_lib.c */
 int mds_update_unpack(struct ptlrpc_request *, int offset,
@@ -131,6 +134,9 @@ int mds_cleanup_orphans(struct obd_device *obd);
 int mds_log_op_unlink(struct obd_device *obd, struct inode *inode,
                       struct lov_mds_md *lmm, int lmm_size,
                       struct llog_cookie *logcookies, int cookies_size);
+int mds_log_op_setattr(struct obd_device *obd, struct inode *inode,
+                      struct lov_mds_md *lmm, int lmm_size,
+                      struct llog_cookie *logcookies, int cookies_size);
 int mds_llog_init(struct obd_device *obd, struct obd_device *tgt, int count,
                   struct llog_catid *logid);
 int mds_llog_finish(struct obd_device *obd, int count);
@@ -186,4 +192,42 @@ void mds_pack_inode2fid(struct ll_fid *fid, struct inode *inode);
 void mds_pack_inode2body(struct mds_body *body, struct inode *inode);
 #endif
 
+/* mds/quota_master.c */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+int lustre_dquot_init(void);
+void lustre_dquot_exit(void);
+int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc);
+void mds_adjust_qunit(struct obd_device *obd, uid_t cuid, gid_t cgid, 
+                     uid_t puid, gid_t pgid, int rc);
+int init_admin_quotafiles(struct obd_device *obd, struct obd_quotactl *oqctl);
+int mds_quota_on(struct obd_device *obd, struct obd_quotactl *oqctl);
+int mds_quota_off(struct obd_device *obd, struct obd_quotactl *oqctl);
+int mds_set_dqinfo(struct obd_device *obd, struct obd_quotactl *oqctl);
+int mds_get_dqinfo(struct obd_device *obd, struct obd_quotactl *oqctl);
+int mds_set_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl);
+int mds_get_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl);
+#else
+static inline int lustre_dquot_init(void) { return 0; }
+static inline void lustre_dquot_exit(void) { return; }
+static inline int dqacq_handler(struct obd_device *obd, 
+                                struct qunit_data *qdata, int opc) {return 0;}
+static inline void mds_adjust_qunit(struct obd_device *obd, uid_t cuid, 
+                                    gid_t cgid, uid_t puid, 
+                                    gid_t pgid, int rc) { return; }
+static inline int init_admin_quotafiles(struct obd_device *obd, 
+                                        struct obd_quotactl *oqctl) {return 0;}
+static inline int mds_quota_on(struct obd_device *obd, 
+                               struct obd_quotactl *oqctl) { return 0; }
+static inline int mds_quota_off(struct obd_device *obd, 
+                                struct obd_quotactl *oqctl) { return 0; }
+static inline int mds_set_dqinfo(struct obd_device *obd, 
+                                 struct obd_quotactl *oqctl) { return 0; }
+static inline int mds_get_dqinfo(struct obd_device *obd, 
+                                 struct obd_quotactl *oqctl) { return 0; }
+static inline int mds_set_dqblk(struct obd_device *obd, 
+                                struct obd_quotactl *oqctl) { return 0; }
+static inline int mds_get_dqblk(struct obd_device *obd, 
+                                struct obd_quotactl *oqctl) { return 0; }
+#endif /* KERNEL_VERSION(2,5,0) */
+
 #endif /* _MDS_INTERNAL_H */
index 9fa802a..9da5623 100644 (file)
 #include <linux/obd_class.h>
 #include <linux/lustre_fsfilt.h>
 #include <linux/lustre_commit_confd.h>
+#include <linux/lustre_log.h>
 
 #include "mds_internal.h"
 
+/* callback function of lov to fill unlink log record */
+static int mds_log_fill_unlink_rec(struct llog_rec_hdr *rec, void *data)
+{
+        struct llog_fill_rec_data *lfd = (struct llog_fill_rec_data *)data;
+        struct llog_unlink_rec *lur = (struct llog_unlink_rec *)rec;
+                                                                                                                             
+        lur->lur_oid = lfd->lfd_id;
+        lur->lur_ogen = lfd->lfd_ogen;
+                                                                                                                             
+        RETURN(0);
+}
+
+/* callback function of lov to fill setattr log record */
+static int mds_log_fill_setattr_rec(struct llog_rec_hdr *rec, void *data)
+{
+        struct llog_fill_rec_data *lfd = (struct llog_fill_rec_data *)data;
+        struct llog_setattr_rec *lsr = (struct llog_setattr_rec *)rec;
+                                                                                                                             
+        lsr->lsr_oid = lfd->lfd_id;
+        lsr->lsr_ogen = lfd->lfd_ogen;
+                                                                                                                             
+        RETURN(0);
+}
+
 static int mds_llog_origin_add(struct llog_ctxt *ctxt,
                         struct llog_rec_hdr *rec, struct lov_stripe_md *lsm,
-                        struct llog_cookie *logcookies, int numcookies)
+                        struct llog_cookie *logcookies, int numcookies,
+                        llog_fill_rec_cb_t fill_cb)
 {
         struct obd_device *obd = ctxt->loc_obd;
         struct obd_device *lov_obd = obd->u.mds.mds_osc_obd;
@@ -48,7 +74,7 @@ static int mds_llog_origin_add(struct llog_ctxt *ctxt,
         ENTRY;
 
         lctxt = llog_get_context(lov_obd, ctxt->loc_idx);
-        rc = llog_add(lctxt, rec, lsm, logcookies, numcookies);
+        rc = llog_add(lctxt, rec, lsm, logcookies, numcookies, fill_cb);
         RETURN(rc);
 }
 
@@ -89,6 +115,7 @@ int mds_log_op_unlink(struct obd_device *obd, struct inode *inode,
         struct mds_obd *mds = &obd->u.mds;
         struct lov_stripe_md *lsm = NULL;
         struct llog_ctxt *ctxt;
+        struct llog_unlink_rec *lur;
         int rc;
         ENTRY;
 
@@ -100,16 +127,66 @@ int mds_log_op_unlink(struct obd_device *obd, struct inode *inode,
         if (rc < 0)
                 RETURN(rc);
 
-        ctxt = llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT);
-        rc = llog_add(ctxt, NULL, lsm, logcookies,
-                      cookies_size / sizeof(struct llog_cookie));
+        /* first prepare unlink log record */
+        OBD_ALLOC(lur, sizeof(*lur));
+        if (!lur)
+                RETURN(-ENOMEM);
+        lur->lur_hdr.lrh_len = lur->lur_tail.lrt_len = sizeof(*lur);
+        lur->lur_hdr.lrh_type = MDS_UNLINK_REC;
+
+        ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT);
+        rc = llog_add(ctxt, &lur->lur_hdr, lsm, logcookies,
+                      cookies_size / sizeof(struct llog_cookie),
+                      mds_log_fill_unlink_rec);
+
+        obd_free_memmd(mds->mds_osc_exp, &lsm);
+        OBD_FREE(lur, sizeof(*lur));
+
+        RETURN(rc);
+}
+
+int mds_log_op_setattr(struct obd_device *obd, struct inode *inode,
+                      struct lov_mds_md *lmm, int lmm_size,
+                      struct llog_cookie *logcookies, int cookies_size)
+{
+        struct mds_obd *mds = &obd->u.mds;
+        struct lov_stripe_md *lsm = NULL;
+        struct llog_ctxt *ctxt;
+        struct llog_setattr_rec *lsr;
+        int rc;
+        ENTRY;
+                                                                                                                             
+        if (IS_ERR(mds->mds_osc_obd))
+                RETURN(PTR_ERR(mds->mds_osc_obd));
+                                                                                                                             
+        rc = obd_unpackmd(mds->mds_osc_exp, &lsm,
+                          lmm, lmm_size);
+        if (rc < 0)
+                RETURN(rc);
+
+        OBD_ALLOC(lsr, sizeof(*lsr));
+        if (!lsr)
+                RETURN(-ENOMEM);
+                                                                                                                             
+        /* prepare setattr log record */
+        lsr->lsr_hdr.lrh_len = lsr->lsr_tail.lrt_len = sizeof(*lsr);
+        lsr->lsr_hdr.lrh_type = MDS_SETATTR_REC;
+        lsr->lsr_uid = inode->i_uid;
+        lsr->lsr_gid = inode->i_gid;
+                                                                                                                             
+        /* write setattr log */
+        ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT);
+        rc = llog_add(ctxt, &lsr->lsr_hdr, lsm, logcookies,
+                      cookies_size / sizeof(struct llog_cookie),
+                      mds_log_fill_setattr_rec);
 
         obd_free_memmd(mds->mds_osc_exp, &lsm);
+        OBD_FREE(lsr, sizeof(*lsr));
 
         RETURN(rc);
 }
 
-static struct llog_operations mds_unlink_orig_logops = {
+static struct llog_operations mds_ost_orig_logops = {
         lop_add:        mds_llog_origin_add,
         lop_connect:    mds_llog_origin_connect,
 };
@@ -125,8 +202,8 @@ int mds_llog_init(struct obd_device *obd, struct obd_device *tgt,
         int rc;
         ENTRY;
 
-        rc = llog_setup(obd, LLOG_UNLINK_ORIG_CTXT, tgt, 0, NULL,
-                        &mds_unlink_orig_logops);
+        rc = llog_setup(obd, LLOG_MDS_OST_ORIG_CTXT, tgt, 0, NULL,
+                        &mds_ost_orig_logops);
         if (rc)
                 RETURN(rc);
 
@@ -148,7 +225,7 @@ int mds_llog_finish(struct obd_device *obd, int count)
         int rc = 0, rc2 = 0;
         ENTRY;
 
-        ctxt = llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT);
+        ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT);
         if (ctxt) 
                 rc = llog_cleanup(ctxt);
 
index 740ca84..90f24bc 100644 (file)
@@ -529,7 +529,7 @@ int mds_lov_synchronize(void *data)
         if (rc != 0)
                 RETURN(rc);
 
-        rc = llog_connect(llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT),
+        rc = llog_connect(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT),
                           obd->u.mds.mds_lov_desc.ld_tgt_count,
                           NULL, NULL, uuid);
         if (rc != 0) {
@@ -597,8 +597,8 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched, int active)
                 CWARN("MDS %s: in recovery, not resetting orphans on %s\n",
                       obd->obd_name, uuid->uuid);
         } else {
-                LASSERT(llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT) != NULL);
-
+                LASSERT(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT) != NULL);
+                
                 rc = mds_lov_start_synchronize(obd, uuid);
         }
         RETURN(rc);
index cf9d014..b2b9d60 100644 (file)
@@ -295,8 +295,9 @@ cleanup_dentry:
 static int mds_create_objects(struct ptlrpc_request *req, int offset,
                               struct mds_update_record *rec,
                               struct mds_obd *mds, struct obd_device *obd,
-                              struct dentry *dchild, void **handle, 
-                              obd_id **ids)
+                              struct dentry *dchild, void **handle, obd_id **ids,
+                              struct llog_cookie **ret_logcookies, 
+                              int *setattr_async_flag)
 {
         struct obdo *oa;
         struct obd_trans_info oti = { 0 };
@@ -414,6 +415,7 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset,
                         }
                         GOTO(out_oa, rc);
                 }
+                *setattr_async_flag = 1;
         } else {
                 rc = obd_iocontrol(OBD_IOC_LOV_SETEA, mds->mds_osc_exp,
                                    0, &lsm, rec->ur_eadata);
@@ -448,14 +450,35 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset,
         lmm_size = rc;
         body->eadatasize = rc;
 
-        if (*handle == NULL)
-                *handle = fsfilt_start(obd, inode, FSFILT_OP_CREATE, NULL);
+        if (*handle == NULL) {
+                if (*setattr_async_flag)
+                        *handle = fsfilt_start_log(obd, inode, 
+                                                   FSFILT_OP_CREATE, NULL, 
+                                                   le32_to_cpu(lmm->lmm_stripe_count));
+                else
+                        *handle = fsfilt_start(obd, inode, FSFILT_OP_CREATE, NULL);
+        }
         if (IS_ERR(*handle)) {
                 rc = PTR_ERR(*handle);
                 *handle = NULL;
                 GOTO(out_oa, rc);
         }
 
+        /* write mds setattr log for created objects */
+        if (*setattr_async_flag && lmm_size) {
+                struct llog_cookie *logcookies = NULL;
+
+                OBD_ALLOC(logcookies, mds->mds_max_cookiesize);
+                if (logcookies == NULL)
+                        GOTO(out_oa, rc = -ENOMEM);
+                *ret_logcookies = logcookies;
+                if (mds_log_op_setattr(obd, inode, lmm, lmm_size, logcookies,
+                                       mds->mds_max_cookiesize) <= 0) {
+                        OBD_FREE(logcookies, mds->mds_max_cookiesize);
+                        *ret_logcookies = NULL;
+               }
+        }  
+
         rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size);
         lmm_buf = lustre_msg_buf(req->rq_repmsg, offset, 0);
         lmm_bufsize = req->rq_repmsg->buflens[offset];
@@ -622,7 +645,9 @@ static int accmode(struct inode *inode, int flags)
 /* Handles object creation, actual opening, and I/O epoch */
 static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild,
                            struct mds_body *body, int flags, void **handle,
-                           struct mds_update_record *rec,struct ldlm_reply *rep)
+                           struct mds_update_record *rec,struct ldlm_reply *rep,
+                           struct llog_cookie **logcookies,
+                           int *setattr_async_flag)
 {
         struct mds_obd *mds = mds_req2mds(req);
         struct obd_device *obd = req->rq_export->exp_obd;
@@ -652,7 +677,8 @@ static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild,
                 if (!(body->valid & OBD_MD_FLEASIZE)) {
                         /* no EA: create objects */
                         rc = mds_create_objects(req, 2, rec, mds, obd,
-                                                dchild, handle, &ids);
+                                                dchild, handle, &ids,
+                                                logcookies, setattr_async_flag);
                         if (rc) {
                                 CERROR("mds_create_objects: rc = %d\n", rc);
                                 up(&dchild->d_inode->i_sem);
@@ -688,11 +714,16 @@ static int mds_open_by_fid(struct ptlrpc_request *req, struct ll_fid *fid,
                            struct mds_body *body, int flags,
                            struct mds_update_record *rec,struct ldlm_reply *rep)
 {
+        struct obd_device *obd = req->rq_export->exp_obd;
         struct mds_obd *mds = mds_req2mds(req);
         struct dentry *dchild;
         char fidname[LL_FID_NAMELEN];
         int fidlen = 0, rc;
         void *handle = NULL;
+        struct llog_cookie *logcookies = NULL;
+        struct lov_mds_md *lmm = NULL;
+        int lmm_size = 0;
+        int setattr_async_flag = 0;
         ENTRY;
 
         fidlen = ll_fid2str(fidname, fid->id, fid->generation);
@@ -727,9 +758,17 @@ static int mds_open_by_fid(struct ptlrpc_request *req, struct ll_fid *fid,
         intent_set_disposition(rep, DISP_LOOKUP_POS);
 
  open:
-        rc = mds_finish_open(req, dchild, body, flags, &handle, rec, rep);
+        rc = mds_finish_open(req, dchild, body, flags, &handle, rec, rep,
+                             &logcookies, &setattr_async_flag);
         rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle,
                                 req, rc, rep ? rep->lock_policy_res1 : 0);
+        /* do mds to ost setattr for new created objects */
+        if (rc == 0 && setattr_async_flag) {
+                lmm = lustre_msg_buf(req->rq_repmsg, 2, 0);
+                lmm_size = req->rq_repmsg->buflens[2];
+                rc = mds_osc_setattr_async(obd, dchild->d_inode, lmm, lmm_size,
+                                           logcookies);
+        }
         /* XXX what do we do here if mds_finish_transno itself failed? */
 
         l_dput(dchild);
@@ -800,6 +839,12 @@ int mds_open(struct mds_update_record *rec, int offset,
         int parent_mode = LCK_PR;
         void *handle = NULL;
         struct dentry_params dp;
+        struct lov_mds_md *lmm = NULL;
+        int lmm_size = 0;
+        struct llog_cookie *logcookies = NULL;
+        int setattr_async_flag = 0;
+        uid_t parent_uid = 0;
+        gid_t parent_gid = 0;
         ENTRY;
 
         if (offset == 2) { /* intent */
@@ -1028,12 +1073,20 @@ int mds_open(struct mds_update_record *rec, int offset,
 
         /* Step 5: mds_open it */
         rc = mds_finish_open(req, dchild, body, rec->ur_flags, &handle, rec,
-                             rep);
+                             rep, &logcookies, &setattr_async_flag);
         GOTO(cleanup, rc);
 
  cleanup:
         rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle,
                                 req, rc, rep ? rep->lock_policy_res1 : 0);
+        /* do mds to ost setattr for new created objects */
+        if (rc == 0 && setattr_async_flag) {
+                lmm = lustre_msg_buf(req->rq_repmsg, 2, 0);
+                lmm_size = req->rq_repmsg->buflens[2];
+                mds_osc_setattr_async(obd, dchild->d_inode, lmm, lmm_size,
+                                      logcookies);
+        }
+
  cleanup_no_trans:
         switch (cleanup_phase) {
         case 2:
@@ -1046,6 +1099,10 @@ int mds_open(struct mds_update_record *rec, int offset,
                         }
                 } else if (created) {
                         mds_lock_new_child(obd, dchild->d_inode, NULL);
+                        /* save uid/gid for quota acquire/release */
+                        parent_uid = dparent->d_inode->i_uid;
+                        parent_gid = dparent->d_inode->i_gid;
+
                 }
                 l_dput(dchild);
         case 1:
@@ -1058,6 +1115,10 @@ int mds_open(struct mds_update_record *rec, int offset,
                 else
                         ptlrpc_save_lock (req, &parent_lockh, parent_mode);
         }
+        
+        /* trigger dqacq on the owner of child and parent */
+        mds_adjust_qunit(obd, current->fsuid, current->fsgid, 
+                         parent_uid, parent_gid, rc);
         RETURN(rc);
 }
 
index bba0365..db4cd58 100644 (file)
@@ -363,6 +363,64 @@ static void reconstruct_reint_setattr(struct mds_update_record *rec,
         l_dput(de);
 }
 
+int mds_osc_setattr_async(struct obd_device *obd, struct inode *inode,
+                          struct lov_mds_md *lmm, int lmm_size,
+                          struct llog_cookie *logcookies)
+{
+        struct mds_obd *mds = &obd->u.mds;
+        struct lov_stripe_md *lsm = NULL;
+        struct obd_trans_info oti = { 0 };
+        struct obdo *oa = NULL;
+        int  cleanup_phase = 0, rc = 0;
+        ENTRY;
+
+        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OST_SETATTR))
+                GOTO(cleanup, rc);
+
+        /* first get memory EA */
+        oa = obdo_alloc();
+        if (!oa)
+                RETURN(-ENOMEM);
+
+        LASSERT(lmm);
+
+        cleanup_phase = 1;
+        rc = obd_unpackmd(mds->mds_osc_exp, &lsm, lmm, lmm_size);
+        if (rc < 0) {
+                CERROR("Error unpack md %p\n", lmm);
+                GOTO(cleanup, rc);
+        }
+        
+        cleanup_phase = 2;
+        /* then fill oa */
+        oa->o_id = lsm->lsm_object_id;
+        oa->o_uid = inode->i_uid;
+        oa->o_gid = inode->i_gid;
+        oa->o_valid = OBD_MD_FLID | OBD_MD_FLUID | OBD_MD_FLGID;
+        if (logcookies) {
+                oa->o_valid |= OBD_MD_FLCOOKIE;
+                oti.oti_logcookies = logcookies;
+        }
+                                                                                                                             
+        /* do setattr from mds to ost asynchronously */
+        rc = obd_setattr_async(mds->mds_osc_exp, oa, lsm, &oti);
+        if (rc)
+                CDEBUG(D_INODE, "mds to ost setattr objid 0x"LPX64" on ost error "
+                       "%d\n", lsm->lsm_object_id, rc);
+cleanup:
+        switch(cleanup_phase) {
+        case 2:
+                obd_free_memmd(mds->mds_osc_exp, &lsm);
+        case 1:
+                obdo_free(oa);
+        case 0:
+                if (logcookies)
+                        OBD_FREE(logcookies, mds->mds_max_cookiesize);
+        }
+                                                                                                                             
+        RETURN(rc);
+}
+
 /* In the raw-setattr case, we lock the child inode.
  * In the write-back case or if being called from open, the client holds a lock
  * already.
@@ -380,7 +438,12 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
         struct lustre_handle lockh;
         void *handle = NULL;
         struct mds_logcancel_data *mlcd = NULL;
+        struct lov_mds_md *lmm = NULL;
+        struct llog_cookie *logcookies = NULL;
+        int lmm_size = 0, need_lock = 1;
         int rc = 0, cleanup_phase = 0, err, locked = 0;
+        uid_t child_uid = 0;
+        gid_t child_gid = 0;
         ENTRY;
 
         LASSERT(offset == 0);
@@ -408,13 +471,37 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
         cleanup_phase = 1;
         inode = de->d_inode;
         LASSERT(inode);
+
+        /* save uid/gid for quota acq/rel */
+        child_uid = inode->i_uid;
+        child_gid = inode->i_gid;
+
         if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) &&
-            rec->ur_eadata != NULL)
+            rec->ur_eadata != NULL) {
                 down(&inode->i_sem);
+                need_lock = 0;
+        }
 
         OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE, inode->i_sb);
 
-        handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL);
+        /* start a log jounal handle if needed*/
+        if (S_ISREG(inode->i_mode) &&
+            rec->ur_iattr.ia_valid & (ATTR_UID | ATTR_GID)) {
+                lmm_size = mds->mds_max_mdsize;
+                OBD_ALLOC(lmm, lmm_size);
+                if (lmm == NULL)
+                        GOTO(cleanup, rc = -ENOMEM);
+                
+                cleanup_phase = 2;
+                rc = mds_get_md(obd, inode, lmm, &lmm_size, need_lock);
+                if (rc < 0) 
+                        GOTO(cleanup, rc);
+
+                handle = fsfilt_start_log(obd, inode, FSFILT_OP_SETATTR, NULL,
+                                          le32_to_cpu(lmm->lmm_stripe_count));
+        } else {
+                handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL);
+        }
         if (IS_ERR(handle))
                 GOTO(cleanup, rc = PTR_ERR(handle));
 
@@ -429,8 +516,22 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
         if (rec->ur_iattr.ia_valid & ATTR_ATTR_FLAG)    /* ioctl */
                 rc = fsfilt_iocontrol(obd, inode, NULL, EXT3_IOC_SETFLAGS,
                                       (long)&rec->ur_iattr.ia_attr_flags);
-        else                                            /* setattr */
+        else                                            /* setattr */
                 rc = fsfilt_setattr(obd, de, handle, &rec->ur_iattr, 0);
+                /* journal chown/chgrp in llog, just like unlink */
+                if (rc == 0 && S_ISREG(inode->i_mode) &&
+                    rec->ur_iattr.ia_valid & (ATTR_UID | ATTR_GID) && lmm_size){
+                        OBD_ALLOC(logcookies, mds->mds_max_cookiesize);
+                        if (logcookies == NULL)
+                                GOTO(cleanup, rc = -ENOMEM);
+                        if (mds_log_op_setattr(obd, inode, lmm, lmm_size,
+                                               logcookies,
+                                               mds->mds_max_cookiesize) <= 0) {
+                                OBD_FREE(logcookies, mds->mds_max_cookiesize);
+                                logcookies = NULL;
+                        }
+                }
+        }
 
         if (rc == 0 && (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) &&
             rec->ur_eadata != NULL) {
@@ -501,7 +602,13 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
                 fsfilt_add_journal_cb(req->rq_export->exp_obd, 0, handle,
                                       mds_cancel_cookies_cb, mlcd);
         err = mds_finish_transno(mds, inode, handle, req, rc, 0);
+        /* do mds to ost setattr if needed */
+        if (!rc && !err && lmm_size) 
+                mds_osc_setattr_async(obd, inode, lmm, lmm_size, logcookies);
+
         switch (cleanup_phase) {
+        case 2:
+                OBD_FREE(lmm, mds->mds_max_mdsize);
         case 1:
                 if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) &&
                     rec->ur_eadata != NULL)
@@ -523,6 +630,13 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
                 rc = err;
 
         req->rq_status = rc;
+
+        /* trigger dqrel/dqacq for original owner and new owner */
+        if (rec->ur_iattr.ia_valid & (ATTR_UID | ATTR_GID)) {
+                mds_adjust_qunit(obd, rec->ur_iattr.ia_uid, 
+                                 rec->ur_iattr.ia_gid, 0, 0, rc);
+                mds_adjust_qunit(obd, child_uid, child_gid, 0, 0, rc);
+        }
         return 0;
 }
 
@@ -563,6 +677,8 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
         struct lustre_handle lockh;
         int rc = 0, err, type = rec->ur_mode & S_IFMT, cleanup_phase = 0;
         int created = 0;
+        uid_t parent_uid = 0;
+        gid_t parent_gid = 0;
         struct dentry_params dp;
         ENTRY;
 
@@ -766,9 +882,14 @@ cleanup:
                                 CERROR("unlink in error path: %d\n", err);
                         break;
                 }
+        } else if (created) {
+                /* save uid/gid of create inode and parent */
+                parent_uid = dir->i_uid;
+                parent_gid = dir->i_gid;
         } else {
                 rc = err;
         }
+
         switch (cleanup_phase) {
         case 2: /* child dentry */
                 l_dput(dchild);
@@ -786,6 +907,10 @@ cleanup:
                 LBUG();
         }
         req->rq_status = rc;
+        
+        /* trigger dqacq on the owner of child and parent */
+        mds_adjust_qunit(obd, current->fsuid, current->fsgid, 
+                         parent_uid, parent_gid, rc);
         return 0;
 }
 
@@ -1241,6 +1366,8 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
         struct lustre_handle parent_lockh, child_lockh, child_reuse_lockh;
         void *handle = NULL;
         int rc = 0, cleanup_phase = 0;
+        uid_t child_uid = 0, parent_uid = 0;
+        gid_t child_gid = 0, parent_gid = 0;
         ENTRY;
 
         LASSERT(offset == 0 || offset == 2);
@@ -1270,6 +1397,12 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
                 GOTO(cleanup, rc = -ENOENT);
         }
 
+        /* save uid/gid for quota acquire/release */
+        child_uid = child_inode->i_uid;
+        child_gid = child_inode->i_gid;
+        parent_uid = dparent->d_inode->i_uid;
+        parent_gid = dparent->d_inode->i_gid;
+        
         cleanup_phase = 2; /* dchild has a lock */
 
         /* We have to do these checks ourselves, in case we are making an
@@ -1440,6 +1573,9 @@ cleanup:
                 LBUG();
         }
         req->rq_status = rc;
+
+        /* trigger dqrel on the owner of child and parent */
+        mds_adjust_qunit(obd, child_uid, child_gid, parent_uid, parent_gid, rc);
         return 0;
 }
 
diff --git a/lustre/mds/quota_context.c b/lustre/mds/quota_context.c
new file mode 100644 (file)
index 0000000..907255a
--- /dev/null
@@ -0,0 +1,588 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  lustre/mds/quota_context.c
+ *  Lustre Quota Context
+ *
+ *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ *   Author: Niu YaWei <niu@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+
+#define DEBUG_SUBSYSTEM S_MDS
+
+#include <linux/version.h>
+#include <linux/fs.h>
+#include <asm/unistd.h>
+#include <linux/slab.h>
+#include <linux/quotaops.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/obd_class.h>
+#include <linux/lustre_quota.h>
+#include <linux/lustre_fsfilt.h>
+
+const unsigned long default_bunit_sz = 100 * 1024 * 1024;       /* 100M bytes */
+const unsigned long default_btune_sz = 50 * 1024 * 1024;        /* 50M bytes */
+const unsigned long default_iunit_sz = 5000;    /* 5000 inodes */
+const unsigned long default_itune_sz = 2500;    /* 2500 inodes */
+
+static inline int const
+qunit_hashfn(struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata)
+{
+        unsigned int id = qdata->qd_id;
+        unsigned int type = qdata->qd_type;
+
+        unsigned long tmp = ((unsigned long)qctxt >> L1_CACHE_SHIFT) ^ id;
+        tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH;
+        return tmp;
+}
+
+static inline struct lustre_qunit *find_qunit(unsigned int hashent,
+                                              struct lustre_quota_ctxt *qctxt,
+                                              struct qunit_data *qdata)
+{
+        struct list_head *pos;
+        struct lustre_qunit *qunit = NULL;
+        struct qunit_data *tmp;
+
+        list_for_each(pos, qunit_hash + hashent) {
+                qunit = list_entry(pos, struct lustre_qunit, lq_hash);
+                tmp = &qunit->lq_data;
+                if (qunit->lq_ctxt == qctxt &&
+                    qdata->qd_id == tmp->qd_id && qdata->qd_type == tmp->qd_type
+                    && qdata->qd_isblk == tmp->qd_isblk)
+                        return qunit;
+        }
+        return NULL;
+}
+
+/* check_cur_qunit - check the current usage of qunit.
+ * @qctxt: quota context
+ * @qdata: the type of quota unit to be checked
+ *
+ * return: 1 - need acquire qunit;
+ *        2 - need release qunit;
+ *        0 - need do nothing.
+ *      < 0 - error.
+ */
+static int
+check_cur_qunit(struct obd_device *obd,
+                struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata)
+{
+        struct super_block *sb = qctxt->lqc_sb;
+        unsigned long qunit_sz, tune_sz;
+        __u64 usage, limit;
+        struct obd_quotactl *qctl = NULL;
+        int ret = 0;
+        ENTRY;
+
+        if (!sb_any_quota_enabled(sb))
+                RETURN(0);
+
+        /* ignore root user */
+        if (qdata->qd_id == 0 && qdata->qd_type == USRQUOTA)
+                RETURN(0);
+
+        OBD_ALLOC(qctl, sizeof(*qctl));
+        if (qctl == NULL)
+                RETURN(-ENOMEM);
+
+        /* get fs quota usage & limit */
+        qctl->qc_cmd = Q_GETQUOTA;
+        qctl->qc_id = qdata->qd_id;
+        qctl->qc_type = qdata->qd_type;
+        ret = fsfilt_quotactl(obd, sb, qctl);
+        if (ret) {
+                if (ret == -ESRCH)      /* no limit */
+                        ret = 0;
+                else
+                        CERROR("can't get fs quota usage! (rc:%d)\n", ret);
+                GOTO(out, ret);
+        }
+
+        if (qdata->qd_isblk) {
+                usage = qctl->qc_dqblk.dqb_curspace;
+                limit = qctl->qc_dqblk.dqb_bhardlimit;
+                qunit_sz = qctxt->lqc_bunit_sz;
+                tune_sz = qctxt->lqc_btune_sz;
+
+                LASSERT(!(qunit_sz % QUOTABLOCK_SIZE));
+                LASSERT(limit == MIN_QLIMIT
+                        || !((__u32) limit % toqb(qunit_sz)));
+                limit = limit << QUOTABLOCK_BITS;
+        } else {
+                usage = qctl->qc_dqblk.dqb_curinodes;
+                limit = qctl->qc_dqblk.dqb_ihardlimit;
+                qunit_sz = qctxt->lqc_iunit_sz;
+                tune_sz = qctxt->lqc_itune_sz;
+        }
+
+        /* if it's not first time to set quota, ignore the no quota limit
+         * case */
+        if (!limit)
+                GOTO(out, ret = 0);
+
+        /* we don't count the MIN_QLIMIT */
+        if ((limit == MIN_QLIMIT && !qdata->qd_isblk) ||
+            (toqb(limit) == MIN_QLIMIT && qdata->qd_isblk))
+                limit = 0;
+
+        LASSERT(qdata->qd_count == 0);
+        if (limit <= usage + tune_sz) {
+                while (qdata->qd_count + limit <= usage + tune_sz)
+                        qdata->qd_count += qunit_sz;
+                ret = 1;
+        } else if (limit > usage + qunit_sz + tune_sz) {
+                while (limit - qdata->qd_count > usage + qunit_sz + tune_sz)
+                        qdata->qd_count += qunit_sz;
+                ret = 2;
+        }
+        LASSERT(ret == 0 || qdata->qd_count);
+out:
+        OBD_FREE(qctl, sizeof(*qctl));
+        RETURN(ret);
+}
+
+/* must hold qctxt->lqc_qunit_lock */
+static struct lustre_qunit *dqacq_in_flight(struct lustre_quota_ctxt *qctxt,
+                                            struct qunit_data *qdata)
+{
+        unsigned int hashent = qunit_hashfn(qctxt, qdata);
+        struct lustre_qunit *qunit = NULL;
+        ENTRY;
+
+        qunit = find_qunit(hashent, qctxt, qdata);
+        RETURN(qunit);
+}
+
+static struct lustre_qunit *alloc_qunit(struct lustre_quota_ctxt *qctxt,
+                                        struct qunit_data *qdata, int opc)
+{
+        struct lustre_qunit *qunit = NULL;
+        ENTRY;
+
+        OBD_SLAB_ALLOC(qunit, qunit_cachep, SLAB_NOFS, sizeof(*qunit));
+        if (qunit == NULL)
+                RETURN(NULL);
+
+        INIT_LIST_HEAD(&qunit->lq_hash);
+        INIT_LIST_HEAD(&qunit->lq_waiters);
+        atomic_set(&qunit->lq_refcnt, 1);
+        qunit->lq_ctxt = qctxt;
+        memcpy(&qunit->lq_data, qdata, sizeof(*qdata));
+        qunit->lq_opc = opc;
+
+        RETURN(qunit);
+}
+
+static inline void free_qunit(struct lustre_qunit *qunit)
+{
+        OBD_SLAB_FREE(qunit, qunit_cachep, sizeof(*qunit));
+}
+
+static inline void qunit_get(struct lustre_qunit *qunit)
+{
+        atomic_inc(&qunit->lq_refcnt);
+}
+
+static void qunit_put(struct lustre_qunit *qunit)
+{
+        LASSERT(atomic_read(&qunit->lq_refcnt));
+        if (atomic_dec_and_test(&qunit->lq_refcnt))
+                free_qunit(qunit);
+}
+
+static void
+insert_qunit_nolock(struct lustre_quota_ctxt *qctxt, struct lustre_qunit *qunit)
+{
+        struct list_head *head;
+
+        head = qunit_hash + qunit_hashfn(qctxt, &qunit->lq_data);
+        list_add(&qunit->lq_hash, head);
+}
+
+static void remove_qunit_nolock(struct lustre_qunit *qunit)
+{
+        LASSERT(!list_empty(&qunit->lq_hash));
+        list_del_init(&qunit->lq_hash);
+}
+
+struct qunit_waiter {
+        struct list_head qw_entry;
+        wait_queue_head_t qw_waitq;
+        int qw_rc;
+};
+
+#define QDATA_DEBUG(qd, fmt, arg...)                                    \
+        CDEBUG(D_QUOTA, "id(%u) type(%u) count(%u) isblk(%u):"          \
+               fmt, qd->qd_id, qd->qd_type, qd->qd_count, qd->qd_isblk, \
+               ## arg);                                                 \
+
+#define INC_QLIMIT(limit, count) (limit == MIN_QLIMIT) ? \
+                                 (limit = count) : (limit += count)
+
+static int
+dqacq_completion(struct obd_device *obd,
+                 struct lustre_quota_ctxt *qctxt,
+                 struct qunit_data *qdata, int rc, int opc)
+{
+        struct lustre_qunit *qunit = NULL;
+        struct super_block *sb = qctxt->lqc_sb;
+        unsigned long qunit_sz;
+        struct list_head *pos, *tmp;
+        int err = 0;
+        ENTRY;
+
+        LASSERT(qdata);
+        qunit_sz =
+            (qdata->qd_isblk) ? qctxt->lqc_bunit_sz : qctxt->lqc_iunit_sz;
+        LASSERT(!(qdata->qd_count % qunit_sz));
+
+        /* update local operational quota file */
+        if (rc == 0) {
+                __u32 count = QUSG(qdata->qd_count, qdata->qd_isblk);
+                struct obd_quotactl *qctl = NULL;
+
+                OBD_ALLOC(qctl, sizeof(*qctl));
+                if (qctl == NULL)
+                        GOTO(out, err = -ENOMEM);
+
+                /* acq/rel qunit for specified uid/gid is serialized,
+                 * so there is no race between get fs quota limit and
+                 * set fs quota limit */
+                qctl->qc_cmd = Q_GETQUOTA;
+                qctl->qc_id = qdata->qd_id;
+                qctl->qc_type = qdata->qd_type;
+                err = fsfilt_quotactl(obd, sb, qctl);
+                if (err) {
+                        CERROR("error get quota fs limit! (rc:%d)\n", err);
+                        GOTO(out_mem, err);
+                }
+
+                switch (opc) {
+                case QUOTA_DQACQ:
+                        if (qdata->qd_isblk) {
+                                qctl->qc_dqblk.dqb_valid = QIF_BLIMITS;
+                                INC_QLIMIT(qctl->qc_dqblk.dqb_bhardlimit,
+                                           count);
+                        } else {
+                                qctl->qc_dqblk.dqb_valid = QIF_ILIMITS;
+                                INC_QLIMIT(qctl->qc_dqblk.dqb_ihardlimit,
+                                           count);
+                        }
+                        break;
+                case QUOTA_DQREL:
+                        if (qdata->qd_isblk) {
+                                LASSERT(count < qctl->qc_dqblk.dqb_bhardlimit);
+                                qctl->qc_dqblk.dqb_valid = QIF_BLIMITS;
+                                qctl->qc_dqblk.dqb_bhardlimit -= count;
+                        } else {
+                                LASSERT(count < qctl->qc_dqblk.dqb_ihardlimit);
+                                qctl->qc_dqblk.dqb_valid = QIF_ILIMITS;
+                                qctl->qc_dqblk.dqb_ihardlimit -= count;
+                        }
+                        break;
+                default:
+                        LBUG();
+                        break;
+                }
+
+                /* clear quota limit */
+                if (count == 0) {
+                        if (qdata->qd_isblk)
+                                qctl->qc_dqblk.dqb_bhardlimit = 0;
+                        else
+                                qctl->qc_dqblk.dqb_ihardlimit = 0;
+                }
+
+                qctl->qc_cmd = Q_SETQUOTA;
+                err = fsfilt_quotactl(obd, sb, qctl);
+                if (err)
+                        CERROR("error set quota fs limit! (rc:%d)\n", err);
+
+                QDATA_DEBUG(qdata, "%s completion\n",
+                            opc == QUOTA_DQACQ ? "DQACQ" : "DQREL");
+out_mem:
+                OBD_FREE(qctl, sizeof(*qctl));
+        } else if (rc == -EDQUOT) {
+                CWARN("acquire qunit got EDQUOT\n");
+        } else {
+                CERROR("acquire qunit got error! (rc:%d)\n", rc);
+        }
+out:
+        /* remove the qunit from hash */
+        spin_lock(&qunit_hash_lock);
+
+        qunit = dqacq_in_flight(qctxt, qdata);
+
+        LASSERT(qunit);
+        LASSERT(opc == qunit->lq_opc);
+        remove_qunit_nolock(qunit);
+
+        /* wake up all waiters */
+        list_for_each_safe(pos, tmp, &qunit->lq_waiters) {
+                struct qunit_waiter *qw = list_entry(pos, struct qunit_waiter,
+                                                     qw_entry);
+                list_del_init(&qw->qw_entry);
+                qw->qw_rc = rc;
+                wake_up(&qw->qw_waitq);
+        }
+
+        spin_unlock(&qunit_hash_lock);
+
+        qunit_put(qunit);
+        RETURN(err);
+}
+
+struct dqacq_async_args {
+        struct lustre_quota_ctxt *aa_ctxt;
+        struct lustre_qunit *aa_qunit;
+};
+
+static int dqacq_interpret(struct ptlrpc_request *req, void *data, int rc)
+{
+        struct dqacq_async_args *aa = (struct dqacq_async_args *)data;
+        struct lustre_quota_ctxt *qctxt = aa->aa_ctxt;
+        struct lustre_qunit *qunit = aa->aa_qunit;
+        struct obd_device *obd = req->rq_import->imp_obd;
+        struct qunit_data *qdata = NULL;
+        ENTRY;
+
+        qdata = lustre_swab_repbuf(req, 0, sizeof(*qdata), lustre_swab_qdata);
+        if (rc == 0 && qdata == NULL)
+                RETURN(-EPROTO);
+
+        LASSERT(qdata->qd_id == qunit->lq_data.qd_id &&
+                qdata->qd_type == qunit->lq_data.qd_type &&
+                (qdata->qd_count == qunit->lq_data.qd_count ||
+                 qdata->qd_count == 0));
+
+        QDATA_DEBUG(qdata, "%s interpret rc(%d).\n",
+                    req->rq_reqmsg->opc == QUOTA_DQACQ ? "DQACQ" : "DQREL", rc);
+
+        rc = dqacq_completion(obd, qctxt, qdata, rc, req->rq_reqmsg->opc);
+
+        RETURN(rc);
+}
+
+static int got_qunit(struct qunit_waiter *waiter)
+{
+        int rc = 0;
+        ENTRY;
+        spin_lock(&qunit_hash_lock);
+        rc = list_empty(&waiter->qw_entry);
+        spin_unlock(&qunit_hash_lock);
+        RETURN(rc);
+}
+
+static int
+schedule_dqacq(struct obd_device *obd,
+               struct lustre_quota_ctxt *qctxt,
+               struct qunit_data *qdata, int opc, int wait)
+{
+        struct lustre_qunit *qunit = NULL;
+        struct qunit_waiter qw;
+        struct l_wait_info lwi = { 0 };
+        int rc = 0;
+        ENTRY;
+
+        INIT_LIST_HEAD(&qw.qw_entry);
+        init_waitqueue_head(&qw.qw_waitq);
+        qw.qw_rc = 0;
+
+        spin_lock(&qunit_hash_lock);
+
+        qunit = dqacq_in_flight(qctxt, qdata);
+        if (qunit && wait) {
+                list_add_tail(&qw.qw_entry, &qunit->lq_waiters);
+                spin_unlock(&qunit_hash_lock);
+                goto wait_completion;
+        } else if (qunit && !wait) {
+                qunit = NULL;
+        } else if (!qunit && (qunit = alloc_qunit(qctxt, qdata, opc)) != NULL)
+                insert_qunit_nolock(qctxt, qunit);
+
+        spin_unlock(&qunit_hash_lock);
+
+        if (qunit) {
+                struct ptlrpc_request *req;
+                struct qunit_data *reqdata;
+                struct dqacq_async_args *aa;
+                int size = sizeof(*reqdata);
+
+                /* master is going to dqacq/dqrel from itself */
+                if (qctxt->lqc_handler) {
+                        int rc2;
+                        QDATA_DEBUG(qdata, "local %s.\n",
+                                    opc == QUOTA_DQACQ ? "DQACQ" : "DQREL");
+                        rc = qctxt->lqc_handler(obd, qdata, opc);
+                        rc2 = dqacq_completion(obd, qctxt, qdata, rc, opc);
+                        RETURN((rc && rc != -EDQUOT) ? rc : rc2);
+                }
+
+                /* build dqacq/dqrel request */
+                LASSERT(qctxt->lqc_import);
+                req = ptlrpc_prep_req(qctxt->lqc_import, opc, 1, &size, NULL);
+                if (!req)
+                        RETURN(-ENOMEM);
+
+                reqdata = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*reqdata));
+                memcpy(reqdata, qdata, sizeof(*reqdata));
+                size = sizeof(*reqdata);
+                req->rq_replen = lustre_msg_size(1, &size);
+
+                LASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
+                aa = (struct dqacq_async_args *)&req->rq_async_args;
+                aa->aa_ctxt = qctxt;
+                aa->aa_qunit = qunit;
+
+                req->rq_interpret_reply = dqacq_interpret;
+                ptlrpcd_add_req(req);
+
+                QDATA_DEBUG(qdata, "%s scheduled.\n",
+                            opc == QUOTA_DQACQ ? "DQACQ" : "DQREL");
+        }
+wait_completion:
+        if (wait && qunit) {
+                struct qunit_data *p = &qunit->lq_data;
+                QDATA_DEBUG(p, "wait for dqacq.\n");
+
+                l_wait_event(qw.qw_waitq, got_qunit(&qw), &lwi);
+                if (qw.qw_rc == 0)
+                        rc = -EAGAIN;
+
+                QDATA_DEBUG(p, "wait dqacq done. (rc:%d)\n", qw.qw_rc);
+        }
+        RETURN(rc);
+}
+
+int
+qctxt_adjust_qunit(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
+                   uid_t uid, gid_t gid, __u32 isblk)
+{
+        int ret, rc = 0, i = USRQUOTA;
+        struct qunit_data qdata[MAXQUOTAS];
+        ENTRY;
+
+        if (!sb_any_quota_enabled(qctxt->lqc_sb))
+                RETURN(0);
+
+        qdata[USRQUOTA].qd_id = uid;
+        qdata[USRQUOTA].qd_type = USRQUOTA;
+        qdata[USRQUOTA].qd_isblk = isblk;
+        qdata[USRQUOTA].qd_count = 0;
+        qdata[GRPQUOTA].qd_id = gid;
+        qdata[GRPQUOTA].qd_type = GRPQUOTA;
+        qdata[GRPQUOTA].qd_isblk = isblk;
+        qdata[GRPQUOTA].qd_count = 0;
+
+next:
+        ret = check_cur_qunit(obd, qctxt, &qdata[i]);
+        if (ret > 0) {
+                int opc;
+                /* need acquire or release */
+                opc = ret == 1 ? QUOTA_DQACQ : QUOTA_DQREL;
+                ret = schedule_dqacq(obd, qctxt, &qdata[i], opc, 0);
+                if (!rc)
+                        rc = ret;
+        }
+        if (++i < MAXQUOTAS)
+                goto next;
+
+        RETURN(rc);
+}
+
+int
+qctxt_wait_on_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
+                    uid_t uid, gid_t gid, __u32 isblk)
+{
+        struct qunit_data qdata[MAXQUOTAS];
+        int i = USRQUOTA, ret, rc = -EAGAIN;
+        ENTRY;
+
+        if (!sb_any_quota_enabled(qctxt->lqc_sb))
+                RETURN(0);
+
+        qdata[USRQUOTA].qd_id = uid;
+        qdata[USRQUOTA].qd_type = USRQUOTA;
+        qdata[USRQUOTA].qd_isblk = isblk;
+        qdata[USRQUOTA].qd_count = 0;
+        qdata[GRPQUOTA].qd_id = gid;
+        qdata[GRPQUOTA].qd_type = GRPQUOTA;
+        qdata[GRPQUOTA].qd_isblk = isblk;
+        qdata[GRPQUOTA].qd_count = 0;
+
+next:
+        ret = check_cur_qunit(obd, qctxt, &qdata[i]);
+        if (ret > 0)
+                rc = schedule_dqacq(obd, qctxt, &qdata[i], QUOTA_DQACQ, 1);
+
+        if (++i < MAXQUOTAS)
+                goto next;
+
+        RETURN(rc);
+}
+
+int
+qctxt_init(struct lustre_quota_ctxt *qctxt, struct super_block *sb,
+           dqacq_handler_t handler)
+{
+        int rc = 0;
+        ENTRY;
+
+        rc = ptlrpcd_addref();
+        if (rc)
+                RETURN(rc);
+
+        qctxt->lqc_handler = handler;
+        qctxt->lqc_sb = sb;
+        qctxt->lqc_import = NULL;
+        qctxt->lqc_flags = 0;
+        qctxt->lqc_bunit_sz = default_bunit_sz;
+        qctxt->lqc_btune_sz = default_btune_sz;
+        qctxt->lqc_iunit_sz = default_iunit_sz;
+        qctxt->lqc_itune_sz = default_itune_sz;
+
+        RETURN(0);
+}
+
+void qctxt_cleanup(struct lustre_quota_ctxt *qctxt, int force)
+{
+        struct list_head *pos, *tmp;
+        struct lustre_qunit *qunit;
+        int i;
+        ENTRY;
+
+        ptlrpcd_decref();
+
+        spin_lock(&qunit_hash_lock);
+
+        for (i = 0; i < NR_DQHASH; i++) {
+                list_for_each_safe(pos, tmp, &qunit_hash[i]) {
+                        qunit = list_entry(pos, struct lustre_qunit, lq_hash);
+                        LASSERT(qunit->lq_ctxt != qctxt);
+                }
+        }
+
+        spin_unlock(&qunit_hash_lock);
+        EXIT;
+}
diff --git a/lustre/mds/quota_master.c b/lustre/mds/quota_master.c
new file mode 100644 (file)
index 0000000..df67fa0
--- /dev/null
@@ -0,0 +1,752 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  lustre/mds/quota_master.c
+ *  Lustre Quota Master request handler
+ *
+ *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ *   Author: Niu YaWei <niu@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+
+#define DEBUG_SUBSYSTEM S_MDS
+
+#include <linux/version.h>
+#include <linux/fs.h>
+#include <asm/unistd.h>
+#include <linux/slab.h>
+#include <linux/quotaops.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/quota.h>
+
+#include <linux/obd_class.h>
+#include <linux/lustre_quota.h>
+#include <linux/lustre_fsfilt.h>
+#include <linux/lustre_mds.h>
+
+#include "mds_internal.h"
+
+static struct list_head lustre_dquot_hash[NR_DQHASH];
+static spinlock_t dquot_hash_lock = SPIN_LOCK_UNLOCKED;
+
+kmem_cache_t *lustre_dquot_cachep;
+
+int lustre_dquot_init(void)
+{
+        int i;
+        ENTRY;
+
+        LASSERT(lustre_dquot_cachep == NULL);
+        lustre_dquot_cachep = kmem_cache_create("lustre_dquot_cache",
+                                                sizeof(struct lustre_dquot),
+                                                0, 0, NULL, NULL);
+        if (!lustre_dquot_cachep)
+                return (-ENOMEM);
+
+        for (i = 0; i < NR_DQHASH; i++) {
+                INIT_LIST_HEAD(lustre_dquot_hash + i);
+        }
+        RETURN(0);
+}
+
+void lustre_dquot_exit(void)
+{
+        int i;
+        ENTRY;
+        /* FIXME cleanup work ?? */
+
+        for (i = 0; i < NR_DQHASH; i++) {
+                LASSERT(list_empty(lustre_dquot_hash + i));
+        }
+        if (lustre_dquot_cachep) {
+                LASSERTF(kmem_cache_destroy(lustre_dquot_cachep) == 0,
+                         "Cannot destroy lustre_dquot_cache\n");
+                lustre_dquot_cachep = NULL;
+        }
+        EXIT;
+}
+
+static inline int const dquot_hashfn(struct lustre_quota_info *info,
+                                     unsigned int id, int type)
+{
+        unsigned long tmp = ((unsigned long)info >> L1_CACHE_SHIFT) ^ id;
+        tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH;
+        return tmp;
+}
+
+static struct lustre_dquot *find_dquot(int hashent,
+                                       struct lustre_quota_info *lqi, qid_t id,
+                                       int type)
+{
+        struct list_head *head;
+        struct lustre_dquot *dquot;
+        ENTRY;
+
+        for (head = lustre_dquot_hash[hashent].next;
+             head != lustre_dquot_hash + hashent; head = head->next) {
+                dquot = list_entry(head, struct lustre_dquot, dq_hash);
+                if (dquot->dq_info == lqi &&
+                    dquot->dq_id == id && dquot->dq_type == type)
+                        RETURN(dquot);
+        }
+        RETURN(NULL);
+}
+
+static struct lustre_dquot *alloc_dquot(struct lustre_quota_info *lqi,
+                                        qid_t id, int type)
+{
+        struct lustre_dquot *dquot = NULL;
+        ENTRY;
+
+        OBD_SLAB_ALLOC(dquot, lustre_dquot_cachep, SLAB_NOFS, sizeof(*dquot));
+        if (dquot == NULL)
+                RETURN(NULL);
+
+        INIT_LIST_HEAD(&dquot->dq_hash);
+        INIT_LIST_HEAD(&dquot->dq_unused);
+        sema_init(&dquot->dq_sem, 1);
+        atomic_set(&dquot->dq_refcnt, 1);
+        dquot->dq_info = lqi;
+        dquot->dq_id = id;
+        dquot->dq_type = type;
+
+        RETURN(dquot);
+}
+
+static void free_dquot(struct lustre_dquot *dquot)
+{
+        OBD_SLAB_FREE(dquot, lustre_dquot_cachep, sizeof(*dquot));
+}
+
+static void insert_dquot_nolock(struct lustre_dquot *dquot)
+{
+        struct list_head *head = lustre_dquot_hash +
+            dquot_hashfn(dquot->dq_info, dquot->dq_id, dquot->dq_type);
+        list_add(&dquot->dq_hash, head);
+}
+
+static void remove_dquot_nolock(struct lustre_dquot *dquot)
+{
+        LASSERT(!list_empty(&dquot->dq_hash));
+        list_del_init(&dquot->dq_hash);
+}
+
+static void lustre_dqput(struct lustre_dquot *dquot)
+{
+        ENTRY;
+        spin_lock(&dquot_hash_lock);
+        LASSERT(atomic_read(&dquot->dq_refcnt));
+        if (atomic_dec_and_test(&dquot->dq_refcnt)) {
+                remove_dquot_nolock(dquot);
+                free_dquot(dquot);
+        }
+        spin_unlock(&dquot_hash_lock);
+        EXIT;
+}
+
+#define DQUOT_DEBUG(dquot, fmt, arg...)                                       \
+        CDEBUG(D_QUOTA, "refcnt(%u) id(%u) type(%u) off("LPX64") flags(%lu) " \
+               "bhardlimit(%u) curspace("LPX64") ihardlimit(%u) "             \
+               "curinodes(%u): " fmt, atomic_read(&dquot->dq_refcnt),         \
+               dquot->dq_id, dquot->dq_type, dquot->dq_off,  dquot->dq_flags, \
+               dquot->dq_dqb.dqb_bhardlimit, dquot->dq_dqb.dqb_curspace,      \
+               dquot->dq_dqb.dqb_ihardlimit, dquot->dq_dqb.dqb_curinodes,     \
+               ## arg);                                                       \
+
+#define QINFO_DEBUG(qinfo, fmt, arg...)                                       \
+        CDEBUG(D_QUOTA, "files (%p/%p) flags(%lu/%lu) blocks(%u/%u) "         \
+               "free_blk(/%u/%u) free_entry(%u/%u): " fmt,                    \
+               qinfo->qi_files[0], qinfo->qi_files[1],                        \
+               qinfo->qi_info[0].dqi_flags, qinfo->qi_info[1].dqi_flags,      \
+               qinfo->qi_info[0].dqi_blocks, qinfo->qi_info[1].dqi_blocks,    \
+               qinfo->qi_info[0].dqi_free_blk, qinfo->qi_info[1].dqi_free_blk,\
+               qinfo->qi_info[0].dqi_free_entry,                              \
+               qinfo->qi_info[1].dqi_free_entry, ## arg);
+
+static struct lustre_dquot *lustre_dqget(struct obd_device *obd,
+                                         struct lustre_quota_info *lqi,
+                                         qid_t id, int type)
+{
+        unsigned int hashent = dquot_hashfn(lqi, id, type);
+        struct lustre_dquot *dquot = NULL;
+        int read = 0;
+        ENTRY;
+
+        spin_lock(&dquot_hash_lock);
+        if ((dquot = find_dquot(hashent, lqi, id, type)) != NULL) {
+                atomic_inc(&dquot->dq_refcnt);
+        } else {
+                dquot = alloc_dquot(lqi, id, type);
+                if (dquot) {
+                        insert_dquot_nolock(dquot);
+                        read = 1;
+                }
+        }
+        spin_unlock(&dquot_hash_lock);
+
+        if (dquot == NULL)
+                RETURN(ERR_PTR(-ENOMEM));
+
+        if (read) {
+                int rc = 0;
+
+                down(&dquot->dq_info->qi_sem);
+                down(&dquot->dq_sem);
+                rc = fsfilt_dquot(obd, dquot, QFILE_RD_DQUOT);
+                up(&dquot->dq_sem);
+                up(&dquot->dq_info->qi_sem);
+                if (rc) {
+                        CERROR("can't read dquot from admin qutoafile! "
+                               "(rc:%d)\n", rc);
+                        lustre_dqput(dquot);
+                        RETURN(ERR_PTR(rc));
+                }
+        }
+        RETURN(dquot);
+}
+
+int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc)
+{
+        struct mds_obd *mds = &obd->u.mds;
+        struct lustre_quota_info *info = &mds->mds_quota_info;
+        struct lustre_dquot *dquot = NULL;
+        __u64 *usage = NULL;
+        __u32 *limit = NULL;
+        int rc = 0;
+        ENTRY;
+
+        dquot = lustre_dqget(obd, info, qdata->qd_id, qdata->qd_type);
+        if (IS_ERR(dquot))
+                RETURN(PTR_ERR(dquot));
+
+        DQUOT_DEBUG(dquot, "get dquot in dqacq_handler\n");
+        QINFO_DEBUG(dquot->dq_info, "get dquot in dqadq_handler\n");
+
+        down(&dquot->dq_info->qi_sem);
+        down(&dquot->dq_sem);
+
+        if (qdata->qd_isblk) {
+                usage = &dquot->dq_dqb.dqb_curspace;
+                limit = &dquot->dq_dqb.dqb_bhardlimit;
+        } else {
+                usage = (__u64 *) & dquot->dq_dqb.dqb_curinodes;
+                limit = &dquot->dq_dqb.dqb_ihardlimit;
+        }
+
+        /* if the quota limit in admin quotafile is zero, we just inform
+         * slave to clear quota limit with zero qd_count */
+        if (*limit == 0) {
+                qdata->qd_count = 0;
+                GOTO(out, rc);
+        }
+        if (opc == QUOTA_DQACQ) {
+                if (QUSG(*usage + qdata->qd_count, qdata->qd_isblk) > *limit)
+                        GOTO(out, rc = -EDQUOT);
+                else
+                        *usage += qdata->qd_count;
+        } else if (opc == QUOTA_DQREL) {
+                LASSERT(*usage - qdata->qd_count >= 0);
+                *usage -= qdata->qd_count;
+        } else {
+                LBUG();
+        }
+
+        rc = fsfilt_dquot(obd, dquot, QFILE_WR_DQUOT);
+out:
+        up(&dquot->dq_sem);
+        up(&dquot->dq_info->qi_sem);
+        lustre_dqput(dquot);
+        RETURN(rc);
+}
+
+void mds_adjust_qunit(struct obd_device *obd, uid_t cuid, gid_t cgid,
+                      uid_t puid, gid_t pgid, int rc)
+{
+        struct mds_obd *mds = &obd->u.mds;
+        struct lustre_quota_ctxt *qctxt = &mds->mds_quota_ctxt;
+        ENTRY;
+
+        if (rc && rc != -EDQUOT) {
+                EXIT;
+                return;
+        }
+        /* dqacq/dqrel file quota on owner of child */
+        rc = qctxt_adjust_qunit(obd, qctxt, cuid, cgid, 0);
+        if (rc)
+                CERROR("error mds adjust child qunit! (rc:%d)\n", rc);
+        /* dqacq/dqrel block quota on owner of parent directory */
+        rc = qctxt_adjust_qunit(obd, qctxt, puid, pgid, 1);
+        if (rc)
+                CERROR("error mds adjust parent qunit! (rc:%d)\n", rc);
+        EXIT;
+}
+
+int init_admin_quotafiles(struct obd_device *obd, struct obd_quotactl *oqctl)
+{
+        struct mds_obd *mds = &obd->u.mds;
+        struct lustre_quota_info *qinfo = &mds->mds_quota_info;
+        const char *quotafiles[] = LUSTRE_ADMIN_QUOTAFILES;
+        struct obd_run_ctxt saved;
+        char name[64];
+        int i, rc = 0;
+        struct dentry *dparent = mds->mds_objects_dir;
+        struct inode *iparent = dparent->d_inode;
+        ENTRY;
+
+        LASSERT(iparent);
+        push_ctxt(&saved, &obd->obd_ctxt, NULL);
+
+        down(&qinfo->qi_sem);
+        for (i = 0; i < MAXQUOTAS; i++) {
+                struct dentry *de = NULL;
+                struct file *fp = NULL;
+
+                if (!Q_TYPESET(oqctl, i))
+                        continue;
+
+                /* quota file has been opened ? */
+                if (qinfo->qi_files[i]) {
+                        CWARN("init %s admin quotafile while quota on.\n",
+                              i == USRQUOTA ? "user" : "group");
+                        continue;
+                }
+
+                /* lookup quota file */
+                rc = 0;
+                down(&iparent->i_sem);
+
+                de = lookup_one_len(quotafiles[i], dparent,
+                                    strlen(quotafiles[i]));
+                if (IS_ERR(de) || de->d_inode == NULL)
+                        rc = IS_ERR(de) ? PTR_ERR(de) : -ENOENT;
+                if (!IS_ERR(de))
+                        dput(de);
+                up(&iparent->i_sem);
+
+                if (rc && rc != -ENOENT) {
+                        CERROR("error lookup quotafile %s! (rc:%d)\n",
+                               name, rc);
+                        break;
+                } else if (!rc) {
+                        continue;
+                }
+
+                sprintf(name, "OBJECTS/%s", quotafiles[i]);
+
+                LASSERT(rc == -ENOENT);
+                /* create quota file */
+                fp = filp_open(name, O_CREAT | O_EXCL, 0644);
+                if (IS_ERR(fp)) {
+                        rc = PTR_ERR(fp);
+                        CERROR("error creating admin quotafile %s (rc:%d)\n",
+                               name, rc);
+                        break;
+                }
+
+                qinfo->qi_files[i] = fp;
+                rc = fsfilt_quotainfo(obd, qinfo, i, QFILE_INIT_INFO);
+                filp_close(fp, 0);
+                qinfo->qi_files[i] = NULL;
+
+                if (rc) {
+                        CERROR("error init %s admin quotafile! (rc:%d)\n",
+                               i == USRQUOTA ? "user" : "group", rc);
+                        break;
+                }
+        }
+        up(&qinfo->qi_sem);
+
+        pop_ctxt(&saved, &obd->obd_ctxt, NULL);
+        RETURN(rc);
+}
+
+int mds_quota_on(struct obd_device *obd, struct obd_quotactl *oqctl)
+{
+        struct mds_obd *mds = &obd->u.mds;
+        struct lustre_quota_info *qinfo = &mds->mds_quota_info;
+        const char *quotafiles[] = LUSTRE_ADMIN_QUOTAFILES;
+        struct obd_run_ctxt saved;
+        char name[64];
+        int i, rc = 0;
+        struct inode *iparent = mds->mds_objects_dir->d_inode;
+        ENTRY;
+
+        LASSERT(iparent);
+        push_ctxt(&saved, &obd->obd_ctxt, NULL);
+
+        down(&qinfo->qi_sem);
+        /* open admin quota files and read quotafile info */
+        for (i = 0; i < MAXQUOTAS; i++) {
+                struct file *fp = NULL;
+
+                if (!Q_TYPESET(oqctl, i))
+                        continue;
+
+                sprintf(name, "OBJECTS/%s", quotafiles[i]);
+
+                if (qinfo->qi_files[i] != NULL) {
+                        rc = -EBUSY;
+                        break;
+                }
+
+                fp = filp_open(name, O_RDWR | O_EXCL, 0644);
+                if (IS_ERR(fp)) {
+                        rc = PTR_ERR(fp);
+                        CERROR("error open %s! (rc:%d)\n", name, rc);
+                        break;
+                }
+                qinfo->qi_files[i] = fp;
+
+                rc = fsfilt_quotainfo(obd, qinfo, i, QFILE_RD_INFO);
+                if (rc) {
+                        CERROR("error read quotainfo of %s! (rc:%d)\n",
+                               name, rc);
+                        break;
+                }
+        }
+        up(&qinfo->qi_sem);
+
+        pop_ctxt(&saved, &obd->obd_ctxt, NULL);
+
+        if (rc && rc != -EBUSY) {
+                down(&qinfo->qi_sem);
+                for (i = 0; i < MAXQUOTAS; i++) {
+                        if (!Q_TYPESET(oqctl, i))
+                                continue;
+                        if (qinfo->qi_files[i])
+                                filp_close(qinfo->qi_files[i], 0);
+                        qinfo->qi_files[i] = NULL;
+                }
+                up(&qinfo->qi_sem);
+        }
+        RETURN(rc);
+}
+
+int mds_quota_off(struct obd_device *obd, struct obd_quotactl *oqctl)
+{
+        struct mds_obd *mds = &obd->u.mds;
+        struct lustre_quota_info *qinfo = &mds->mds_quota_info;
+        int i, rc = 0;
+        ENTRY;
+
+        down(&qinfo->qi_sem);
+        /* close admin quota files */
+        for (i = 0; i < MAXQUOTAS; i++) {
+                if (!Q_TYPESET(oqctl, i))
+                        continue;
+
+                if (qinfo->qi_files[i] == NULL) {
+                        rc = -ESRCH;
+                        continue;
+                }
+                filp_close(qinfo->qi_files[i], 0);
+                qinfo->qi_files[i] = NULL;
+        }
+        up(&qinfo->qi_sem);
+
+        RETURN(rc);
+}
+
+int mds_set_dqinfo(struct obd_device *obd, struct obd_quotactl *oqctl)
+{
+        struct mds_obd *mds = &obd->u.mds;
+        struct lustre_quota_info *qinfo = &mds->mds_quota_info;
+        struct obd_dqinfo *dqinfo = &oqctl->qc_dqinfo;
+        int rc = 0;
+        ENTRY;
+
+        if (qinfo->qi_files[oqctl->qc_type] == NULL)
+                RETURN(-ESRCH);
+
+        down(&qinfo->qi_sem);
+        qinfo->qi_info[oqctl->qc_type].dqi_bgrace = dqinfo->dqi_bgrace;
+        qinfo->qi_info[oqctl->qc_type].dqi_igrace = dqinfo->dqi_igrace;
+        qinfo->qi_info[oqctl->qc_type].dqi_flags = dqinfo->dqi_flags;
+
+        rc = fsfilt_quotainfo(obd, qinfo, oqctl->qc_type, QFILE_WR_INFO);
+        up(&qinfo->qi_sem);
+
+        RETURN(rc);
+}
+
+int mds_get_dqinfo(struct obd_device *obd, struct obd_quotactl *oqctl)
+{
+        struct mds_obd *mds = &obd->u.mds;
+        struct lustre_quota_info *qinfo = &mds->mds_quota_info;
+        struct obd_dqinfo *dqinfo = &oqctl->qc_dqinfo;
+        ENTRY;
+
+        if (qinfo->qi_files[oqctl->qc_type] == NULL)
+                RETURN(-ESRCH);
+
+        down(&qinfo->qi_sem);
+        dqinfo->dqi_bgrace = qinfo->qi_info[oqctl->qc_type].dqi_bgrace;
+        dqinfo->dqi_igrace = qinfo->qi_info[oqctl->qc_type].dqi_igrace;
+        dqinfo->dqi_flags = qinfo->qi_info[oqctl->qc_type].dqi_flags;
+        up(&qinfo->qi_sem);
+
+        RETURN(0);
+}
+
+static int mds_init_slave_ilimits(struct obd_device *obd,
+                                  struct obd_quotactl *oqctl)
+{
+        /* XXX: for file limits only adjust local now */
+        struct mds_obd *mds = &obd->u.mds;
+        unsigned int uid = 0, gid = 0;
+        struct obd_quotactl *ioqc;
+        int rc;
+        ENTRY;
+
+        /* if we are going to set zero limit, needn't init slaves */
+        if (!oqctl->qc_dqblk.dqb_ihardlimit)
+                RETURN(0);
+
+        OBD_ALLOC(ioqc, sizeof(*ioqc));
+        if (!ioqc)
+                RETURN(-ENOMEM);
+
+        ioqc->qc_cmd = Q_SETQUOTA;
+        ioqc->qc_id = oqctl->qc_id;
+        ioqc->qc_type = oqctl->qc_type;
+        ioqc->qc_dqblk.dqb_valid = QIF_ILIMITS;
+        ioqc->qc_dqblk.dqb_ihardlimit = MIN_QLIMIT;
+
+        /* set local limit to MIN_QLIMIT */
+        rc = fsfilt_quotactl(obd, mds->mds_sb, ioqc);
+        if (rc)
+                GOTO(out, rc);
+
+        /* trigger local qunit pre-acquire */
+        if (oqctl->qc_type == USRQUOTA)
+                uid = oqctl->qc_id;
+        else
+                gid = oqctl->qc_id;
+
+        rc = qctxt_adjust_qunit(obd, &mds->mds_quota_ctxt, uid, gid, 0);
+        if (rc) {
+                CERROR("error mds adjust local file quota! (rc:%d)\n", rc);
+                GOTO(out, rc);
+        }
+        /* FIXME initialize all slaves in CMD */
+out:
+        OBD_FREE(ioqc, sizeof(*ioqc));
+        RETURN(rc);
+}
+
+static int mds_init_slave_blimits(struct obd_device *obd,
+                                  struct obd_quotactl *oqctl)
+{
+        struct mds_obd *mds = &obd->u.mds;
+        struct obd_quotactl *ioqc;
+        unsigned int uid = 0, gid = 0;
+        int rc;
+        ENTRY;
+
+        /* if we are going to set zero limit, needn't init slaves */
+        if (!oqctl->qc_dqblk.dqb_bhardlimit)
+                RETURN(0);
+
+        OBD_ALLOC(ioqc, sizeof(*ioqc));
+        if (!ioqc)
+                RETURN(-ENOMEM);
+
+        ioqc->qc_cmd = Q_SETQUOTA;
+        ioqc->qc_id = oqctl->qc_id;
+        ioqc->qc_type = oqctl->qc_type;
+        ioqc->qc_dqblk.dqb_valid = QIF_BLIMITS;
+        ioqc->qc_dqblk.dqb_bhardlimit = MIN_QLIMIT;
+
+        /* set local limit to MIN_QLIMIT */
+        rc = fsfilt_quotactl(obd, mds->mds_sb, ioqc);
+        if (rc)
+                GOTO(out, rc);
+
+        /* trigger local qunit pre-acquire */
+        if (oqctl->qc_type == USRQUOTA)
+                uid = oqctl->qc_id;
+        else
+                gid = oqctl->qc_id;
+
+        rc = qctxt_adjust_qunit(obd, &mds->mds_quota_ctxt, uid, gid, 1);
+        if (rc) {
+                CERROR("error mds adjust local block quota! (rc:%d)\n", rc);
+                GOTO(out, rc);
+        }
+
+        /* initialize all slave's limit */
+        ioqc->qc_cmd = Q_INITQUOTA;
+        rc = obd_quotactl(mds->mds_osc_exp, ioqc);
+out:
+        OBD_FREE(ioqc, sizeof(*ioqc));
+        RETURN(rc);
+}
+
+int mds_set_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl)
+{
+        struct mds_obd *mds = &obd->u.mds;
+        struct lustre_quota_info *qinfo = &mds->mds_quota_info;
+        __u32 ihardlimit, isoftlimit, bhardlimit, bsoftlimit;
+        time_t btime, itime;
+        struct lustre_dquot *dquot;
+        struct obd_dqblk *dqblk = &oqctl->qc_dqblk;
+        int rc = 0;
+        ENTRY;
+
+        if (qinfo->qi_files[oqctl->qc_type] == NULL)
+                RETURN(-ESRCH);
+
+        dquot = lustre_dqget(obd, qinfo, oqctl->qc_id, oqctl->qc_type);
+        if (IS_ERR(dquot))
+                RETURN(PTR_ERR(dquot));
+        DQUOT_DEBUG(dquot, "get dquot in mds_set_blk\n");
+        QINFO_DEBUG(dquot->dq_info, "get dquot in mds_set_blk\n");
+
+        down(&dquot->dq_info->qi_sem);
+        down(&dquot->dq_sem);
+
+        ihardlimit = dquot->dq_dqb.dqb_ihardlimit;
+        isoftlimit = dquot->dq_dqb.dqb_isoftlimit;
+        bhardlimit = dquot->dq_dqb.dqb_bhardlimit;
+        bsoftlimit = dquot->dq_dqb.dqb_bsoftlimit;
+        btime = dquot->dq_dqb.dqb_btime;
+        itime = dquot->dq_dqb.dqb_itime;
+
+        if (dqblk->dqb_valid & QIF_BLIMITS) {
+                dquot->dq_dqb.dqb_bhardlimit = dqblk->dqb_bhardlimit;
+                dquot->dq_dqb.dqb_bsoftlimit = dqblk->dqb_bsoftlimit;
+                /* clear usage (limit pool) */
+                if (dquot->dq_dqb.dqb_bhardlimit == 0)
+                        dquot->dq_dqb.dqb_curspace = 0;
+        }
+
+        if (dqblk->dqb_valid & QIF_ILIMITS) {
+                dquot->dq_dqb.dqb_ihardlimit = dqblk->dqb_ihardlimit;
+                dquot->dq_dqb.dqb_isoftlimit = dqblk->dqb_isoftlimit;
+                /* clear usage (limit pool) */
+                if (dquot->dq_dqb.dqb_ihardlimit == 0)
+                        dquot->dq_dqb.dqb_curinodes = 0;
+        }
+
+        if (dqblk->dqb_valid & QIF_BTIME)
+                dquot->dq_dqb.dqb_btime = dqblk->dqb_btime;
+
+        if (dqblk->dqb_valid & QIF_ITIME)
+                dquot->dq_dqb.dqb_itime = dqblk->dqb_itime;
+
+        rc = fsfilt_dquot(obd, dquot, QFILE_WR_DQUOT);
+
+        up(&dquot->dq_sem);
+        up(&dquot->dq_info->qi_sem);
+
+        if (rc)
+                GOTO(out, rc);
+
+        if (dqblk->dqb_valid & QIF_ILIMITS && !ihardlimit) {
+                rc = mds_init_slave_ilimits(obd, oqctl);
+                if (rc) {
+                        CERROR("init slave ilimits failed! (rc:%d)\n", rc);
+                        GOTO(revoke_out, rc);
+                }
+        }
+
+        if (dqblk->dqb_valid & QIF_BLIMITS && !bhardlimit) {
+                rc = mds_init_slave_blimits(obd, oqctl);
+                if (rc) {
+                        CERROR("init slave blimits failed! (rc:%d)\n", rc);
+                        GOTO(revoke_out, rc);
+                }
+        }
+
+revoke_out:
+        if (rc) {
+                /* cancel previous setting */
+                down(&dquot->dq_info->qi_sem);
+                down(&dquot->dq_sem);
+                dquot->dq_dqb.dqb_ihardlimit = ihardlimit;
+                dquot->dq_dqb.dqb_isoftlimit = isoftlimit;
+                dquot->dq_dqb.dqb_bhardlimit = bhardlimit;
+                dquot->dq_dqb.dqb_bsoftlimit = bsoftlimit;
+                dquot->dq_dqb.dqb_btime = btime;
+                dquot->dq_dqb.dqb_itime = itime;
+                fsfilt_dquot(obd, dquot, QFILE_WR_DQUOT);
+                up(&dquot->dq_sem);
+                up(&dquot->dq_info->qi_sem);
+        }
+out:
+        lustre_dqput(dquot);
+        RETURN(rc);
+}
+
+static int mds_get_space(struct obd_device *obd, struct obd_quotactl *oqctl)
+{
+        struct obd_quotactl *soqc;
+        int rc;
+
+        OBD_ALLOC(soqc, sizeof(*soqc));
+        if (!soqc)
+                RETURN(-ENOMEM);
+
+        soqc->qc_cmd = oqctl->qc_cmd;
+        soqc->qc_id = oqctl->qc_id;
+        soqc->qc_type = oqctl->qc_type;
+
+        rc = obd_quotactl(obd->u.mds.mds_osc_exp, soqc);
+
+        oqctl->qc_dqblk.dqb_curspace = soqc->qc_dqblk.dqb_curspace;
+
+        OBD_FREE(soqc, sizeof(*soqc));
+        return rc;
+}
+
+int mds_get_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl)
+{
+        struct mds_obd *mds = &obd->u.mds;
+        struct lustre_quota_info *qinfo = &mds->mds_quota_info;
+        struct lustre_dquot *dquot;
+        struct obd_dqblk *dqblk = &oqctl->qc_dqblk;
+        int rc;
+        ENTRY;
+
+        if (qinfo->qi_files[oqctl->qc_type] == NULL)
+                RETURN(-ESRCH);
+
+        dquot = lustre_dqget(obd, qinfo, oqctl->qc_id, oqctl->qc_type);
+        if (IS_ERR(dquot))
+                RETURN(PTR_ERR(dquot));
+
+        down(&dquot->dq_sem);
+        dqblk->dqb_ihardlimit = dquot->dq_dqb.dqb_ihardlimit;
+        dqblk->dqb_isoftlimit = dquot->dq_dqb.dqb_isoftlimit;
+        dqblk->dqb_bhardlimit = dquot->dq_dqb.dqb_bhardlimit;
+        dqblk->dqb_bsoftlimit = dquot->dq_dqb.dqb_bsoftlimit;
+        dqblk->dqb_btime = dquot->dq_dqb.dqb_btime;
+        dqblk->dqb_itime = dquot->dq_dqb.dqb_itime;
+        up(&dquot->dq_sem);
+
+        /* the usages in admin quota file is inaccurate */
+        dqblk->dqb_curinodes = 0;
+        dqblk->dqb_curspace = 0;
+        rc = mds_get_space(obd, oqctl);
+
+        lustre_dqput(dquot);
+        RETURN(rc);
+}
index a99c3f2..b3e0607 100644 (file)
@@ -395,6 +395,9 @@ void *obd_psdev = NULL;
 
 EXPORT_SYMBOL(obd_dev);
 EXPORT_SYMBOL(obdo_cachep);
+EXPORT_SYMBOL(qunit_cachep);
+EXPORT_SYMBOL(qunit_hash_lock);
+EXPORT_SYMBOL(qunit_hash);
 EXPORT_SYMBOL(obd_fail_loc);
 EXPORT_SYMBOL(ll_set_rdonly);
 EXPORT_SYMBOL(ll_clear_rdonly);
@@ -418,6 +421,7 @@ EXPORT_SYMBOL(class_name2obd);
 EXPORT_SYMBOL(class_uuid2dev);
 EXPORT_SYMBOL(class_uuid2obd);
 EXPORT_SYMBOL(class_find_client_obd);
+EXPORT_SYMBOL(class_find_client_notype);
 EXPORT_SYMBOL(class_devices_in_group);
 EXPORT_SYMBOL(__class_export_put);
 EXPORT_SYMBOL(class_new_export);
index d5d4334..db8500f 100644 (file)
 #include <linux/kmod.h>   /* for request_module() */
 #include <linux/module.h>
 #include <linux/obd_class.h>
+#include <linux/lustre_mds.h>
+#include <linux/obd_ost.h>
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
+#include <linux/quota.h>
 #else
 #include <liblustre.h>
 #include <linux/obd_class.h>
 #include <linux/obd.h>
 #endif
 #include <linux/lprocfs_status.h>
+#include <linux/lustre_quota.h>
 
 extern struct list_head obd_types;
 static spinlock_t obd_types_lock = SPIN_LOCK_UNLOCKED;
 kmem_cache_t *obdo_cachep = NULL;
 kmem_cache_t *import_cachep = NULL;
 
+kmem_cache_t *qunit_cachep = NULL;
+struct list_head qunit_hash[NR_DQHASH];
+spinlock_t qunit_hash_lock = SPIN_LOCK_UNLOCKED;
+
 int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
 void (*ptlrpc_abort_inflight_superhack)(struct obd_import *imp);
 
@@ -309,6 +317,18 @@ struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid,
         return NULL;
 }
 
+struct obd_device *class_find_client_notype(struct obd_uuid *tgt_uuid,
+                                            struct obd_uuid *grp_uuid)
+{
+        struct obd_device *obd;
+
+        obd = class_find_client_obd(tgt_uuid, LUSTRE_MDC_NAME, NULL);
+        if (!obd)
+                obd = class_find_client_obd(tgt_uuid, LUSTRE_OSC_NAME,
+                                            grp_uuid);
+        return obd;
+}
+
 /* Iterate the obd_device list looking devices have grp_uuid. Start
    searching at *next, and if a device is found, the next index to look
    at is saved in *next. If next is NULL, then the first matching device
@@ -341,6 +361,23 @@ struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, int *next)
         return NULL;
 }
 
+static void obd_cleanup_qunit_cache(void)
+{
+        int i;
+        ENTRY;
+
+        spin_lock(&qunit_hash_lock);
+        for (i = 0; i < NR_DQHASH; i++)
+                LASSERT(list_empty(qunit_hash + i));
+        spin_unlock(&qunit_hash_lock);
+        
+        if (qunit_cachep) {
+                LASSERTF(kmem_cache_destroy(qunit_cachep) == 0,
+                         "Cannot destroy ll_qunit_cache\n");
+                qunit_cachep = NULL;
+        }
+        EXIT;
+}
 
 void obd_cleanup_caches(void)
 {
@@ -355,12 +392,34 @@ void obd_cleanup_caches(void)
                          "Cannot destory ll_import_cache\n");
                 import_cachep = NULL;
         }
+        obd_cleanup_qunit_cache();
         EXIT;
 }
 
+static int obd_init_qunit_cache(void)
+{
+        int i;
+        ENTRY;
+        
+        LASSERT(qunit_cachep == NULL);
+        qunit_cachep = kmem_cache_create("ll_qunit_cache", 
+                                         sizeof(struct lustre_qunit),
+                                         0, 0, NULL, NULL);
+        if (!qunit_cachep)
+                RETURN(-ENOMEM);
+
+        spin_lock(&qunit_hash_lock);
+        for (i = 0; i < NR_DQHASH; i++)
+                INIT_LIST_HEAD(qunit_hash + i);
+        spin_unlock(&qunit_hash_lock);
+        RETURN(0);
+}
+
 int obd_init_caches(void)
 {
+        int rc = 0;
         ENTRY;
+
         LASSERT(obdo_cachep == NULL);
         obdo_cachep = kmem_cache_create("ll_obdo_cache", sizeof(struct obdo),
                                         0, 0, NULL, NULL);
@@ -374,6 +433,10 @@ int obd_init_caches(void)
         if (!import_cachep)
                 GOTO(out, -ENOMEM);
 
+        rc = obd_init_qunit_cache();
+        if (rc)
+                GOTO(out, rc);
+
         RETURN(0);
  out:
         obd_cleanup_caches();
index f364a21..3c05a97 100644 (file)
@@ -110,6 +110,7 @@ static int llog_check_cb(struct llog_handle *handle, struct llog_rec_hdr *rec,
                 case OST_SZ_REC:
                 case OST_RAID1_REC:
                 case MDS_UNLINK_REC:
+                case MDS_SETATTR_REC:
                 case OBD_CFG_REC:
                 case PTL_CFG_REC:
                 case LLOG_HDR_MAGIC: {
index 2cae0a7..6c45e8d 100644 (file)
@@ -304,8 +304,9 @@ static int llog_lvfs_write_rec(struct llog_handle *loghandle,
         if (rc == 0 && reccookie) {
                 reccookie->lgc_lgl = loghandle->lgh_id;
                 reccookie->lgc_index = index;
-                if (rec->lrh_type == MDS_UNLINK_REC)
-                        reccookie->lgc_subsys = LLOG_UNLINK_ORIG_CTXT;
+                if ((rec->lrh_type == MDS_UNLINK_REC) || 
+                                (rec->lrh_type == MDS_SETATTR_REC))
+                        reccookie->lgc_subsys = LLOG_MDS_OST_ORIG_CTXT;
                 else if (rec->lrh_type == OST_SZ_REC)
                         reccookie->lgc_subsys = LLOG_SIZE_ORIG_CTXT;
                 else if (rec->lrh_type == OST_RAID1_REC)
index c22316c..0796c50 100644 (file)
@@ -92,7 +92,7 @@ EXPORT_SYMBOL(llog_sync);
 
 int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec,
                 struct lov_stripe_md *lsm, struct llog_cookie *logcookies,
-                int numcookies)
+                int numcookies, llog_fill_rec_cb_t fill_cb)
 {
         int rc;
         ENTRY;
@@ -104,7 +104,7 @@ int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec,
         
         CTXT_CHECK_OP(ctxt, add, -EOPNOTSUPP);
 
-        rc = CTXTP(ctxt, add)(ctxt, rec, lsm, logcookies, numcookies);
+        rc = CTXTP(ctxt, add)(ctxt, rec, lsm, logcookies, numcookies, fill_cb);
         RETURN(rc);
 }
 EXPORT_SYMBOL(llog_add);
@@ -269,7 +269,8 @@ EXPORT_SYMBOL(llog_obd_origin_cleanup);
 /* add for obdfilter/sz and mds/unlink */
 int llog_obd_origin_add(struct llog_ctxt *ctxt,
                         struct llog_rec_hdr *rec, struct lov_stripe_md *lsm,
-                        struct llog_cookie *logcookies, int numcookies)
+                        struct llog_cookie *logcookies, int numcookies,
+                        llog_fill_rec_cb_t fill_cb)
 {
         struct llog_handle *cathandle;
         int rc;
index 3a2ae51..5b13389 100644 (file)
@@ -107,6 +107,17 @@ void lustre_swab_llog_rec(struct llog_rec_hdr *rec, struct llog_rec_tail *tail)
                 break;
         }
 
+        case MDS_SETATTR_REC: {
+                struct llog_setattr_rec *lsr = (struct llog_setattr_rec *)rec;
+                                                                                                                             
+                __swab64s(&lsr->lsr_oid);
+                __swab32s(&lsr->lsr_ogen);
+                __swab32s(&lsr->lsr_uid);
+                __swab32s(&lsr->lsr_gid);
+                                                                                                                             
+                break;
+        }
+
         case OBD_CFG_REC:
         case PTL_CFG_REC:
                 /* these are swabbed as they are consumed */
index 398b462..1b663e9 100644 (file)
@@ -607,7 +607,7 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
         LASSERT(obd->obd_proc_entry != NULL);
         LASSERT(obd->obd_cntr_base == 0);
 
-        num_stats = 1 + OBD_COUNTER_OFFSET(notify) +
+        num_stats = 1 + OBD_COUNTER_OFFSET(quotactl) +
                 num_private_stats;
         stats = lprocfs_alloc_stats(num_stats);
         if (stats == NULL)
@@ -634,6 +634,7 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, create);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, setattr);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, setattr_async);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, getattr);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, getattr_async);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, brw);
@@ -667,6 +668,8 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, unpin);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, import_event);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, notify);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotacheck);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotactl);
 
         for (i = num_private_stats; i < num_stats; i++) {
                 /* If this LBUGs, it is likely that an obd
index 3e0871a..eef89e8 100644 (file)
@@ -1,4 +1,5 @@
 MODULES := obdfilter
+MDS :=  @top_srcdir@/lustre/mds/
 obdfilter-objs := filter.o filter_io.o filter_log.o filter_san.o
 obdfilter-objs += lproc_obdfilter.o filter_lvb.o
 
@@ -6,6 +7,7 @@ ifeq ($(PATCHLEVEL),4)
 obdfilter-objs += filter_io_24.o
 else
 obdfilter-objs += filter_io_26.o
+obdfilter-objs += $(MDS)quota_context.o
 endif # PATCHLEVEL 
 
 @INCLUDE_RULES@
index a870d67..e93cc25 100644 (file)
@@ -41,6 +41,8 @@
 #include <linux/dcache.h>
 #include <linux/init.h>
 #include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/quotaops.h>
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
 # include <linux/mount.h>
 # include <linux/buffer_head.h>
@@ -54,6 +56,7 @@
 #include <linux/lustre_log.h>
 #include <linux/lustre_commit_confd.h>
 #include <libcfs/list.h>
+#include <linux/lustre_quota.h>
 
 #include "filter_internal.h"
 
@@ -1049,11 +1052,9 @@ static int filter_destroy_internal(struct obd_device *obd, obd_id objid,
         }
 
         rc = vfs_unlink(dparent->d_inode, dchild);
-
         if (rc)
                 CERROR("error unlinking objid %.*s: rc %d\n",
                        dchild->d_name.len, dchild->d_name.name, rc);
-
         RETURN(rc);
 }
 
@@ -1261,6 +1262,8 @@ int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
         spin_lock_init(&filter->fo_w_disk_iosize.oh_lock);
         filter->fo_readcache_max_filesize = FILTER_MAX_CACHE_SIZE;
 
+        atomic_set(&filter->fo_quotachecking, 1);
+
         sprintf(ns_name, "filter-%s", obd->obd_uuid.uuid);
         obd->obd_namespace = ldlm_namespace_new(ns_name, LDLM_NAMESPACE_SERVER);
         if (obd->obd_namespace == NULL)
@@ -1300,6 +1303,13 @@ int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
                               obd->obd_replayable ? "enabled" : "disabled");
         }
 
+        rc = qctxt_init(&filter->fo_quota_ctxt, filter->fo_sb, NULL);
+        if (rc) {
+                CERROR("initialize quota context failed! (rc:%d)\n", rc);
+                qctxt_cleanup(&filter->fo_quota_ctxt, 0);
+                GOTO(err_post, rc);
+        }
+
         RETURN(0);
 
 err_post:
@@ -1368,6 +1378,8 @@ static int filter_cleanup(struct obd_device *obd)
                 }
         }
 
+        qctxt_cleanup(&filter->fo_quota_ctxt, 0);
+
         ldlm_namespace_free(obd->obd_namespace, obd->obd_force);
 
         if (filter->fo_sb == NULL)
@@ -1380,6 +1392,8 @@ static int filter_cleanup(struct obd_device *obd)
         filter_post(obd);
 
         shrink_dcache_parent(filter->fo_sb->s_root);
+        
+        DQUOT_OFF(filter->fo_sb);
 
         if (atomic_read(&filter->fo_vfsmnt->mnt_count) > 1)
                 CERROR("%s: mount point %p busy, mnt_count: %d\n",
@@ -1612,7 +1626,7 @@ static int filter_disconnect(struct obd_export *exp)
         fsfilt_sync(obd, obd->u.filter.fo_sb);
 
         /* flush any remaining cancel messages out to the target */
-        ctxt = llog_get_context(obd, LLOG_UNLINK_REPL_CTXT);
+        ctxt = llog_get_context(obd, LLOG_MDS_OST_REPL_CTXT);
         err = llog_sync(ctxt, exp);
         if (err)
                 CERROR("error flushing logs to MDS: rc %d\n", err);
@@ -1678,33 +1692,51 @@ static int filter_getattr(struct obd_export *exp, struct obdo *oa,
 static int filter_setattr(struct obd_export *exp, struct obdo *oa,
                           struct lov_stripe_md *md, struct obd_trans_info *oti)
 {
+        struct obd_device *obd;
         struct obd_run_ctxt saved;
         struct filter_obd *filter;
         struct dentry *dentry;
         struct iattr iattr;
+        uid_t orig_uid = 0;
+        gid_t orig_gid = 0;
         struct ldlm_res_id res_id = { .name = { oa->o_id } };
         struct ldlm_resource *res;
         void *handle;
+        struct llog_cookie *fcc = NULL;
         int rc, rc2;
         ENTRY;
 
-        LASSERT(oti != NULL);
-
         dentry = filter_oa2dentry(exp->exp_obd, oa);
         if (IS_ERR(dentry))
                 RETURN(PTR_ERR(dentry));
 
-        filter = &exp->exp_obd->u.filter;
+        obd = exp->exp_obd;
+        filter = &obd->u.filter;
 
         iattr_from_obdo(&iattr, oa, oa->o_valid);
 
         push_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL);
         lock_kernel();
 
+        if (oa->o_valid & OBD_MD_FLCOOKIE) {
+                OBD_ALLOC(fcc, sizeof(*fcc));
+                if (fcc != NULL)
+                        memcpy(fcc, obdo_logcookie(oa), sizeof(*fcc));
+        }
+
         if (iattr.ia_valid & ATTR_SIZE)
                 down(&dentry->d_inode->i_sem);
-        handle = fsfilt_start(exp->exp_obd, dentry->d_inode, FSFILT_OP_SETATTR,
-                              oti);
+
+        if (iattr.ia_valid & (ATTR_UID | ATTR_GID)) {
+                orig_uid = dentry->d_inode->i_uid;
+                orig_gid = dentry->d_inode->i_gid;
+                handle = fsfilt_start_log(exp->exp_obd, dentry->d_inode, 
+                                          FSFILT_OP_SETATTR, oti, 1);
+        } else {
+                handle = fsfilt_start(exp->exp_obd, dentry->d_inode,
+                                      FSFILT_OP_SETATTR, oti);
+        }
+
         if (IS_ERR(handle))
                 GOTO(out_unlock, rc = PTR_ERR(handle));
 
@@ -1713,8 +1745,21 @@ static int filter_setattr(struct obd_export *exp, struct obdo *oa,
                 rc = fsfilt_iocontrol(exp->exp_obd, dentry->d_inode, NULL,
                                       EXT3_IOC_SETFLAGS,
                                       (long)&iattr.ia_attr_flags);
-        else
+        else {
                 rc = fsfilt_setattr(exp->exp_obd, dentry, handle, &iattr, 1);
+                /* set cancel cookie callback function */  
+                if (fcc != NULL) {
+                        if (oti != NULL)
+                                fsfilt_add_journal_cb(obd, 0, oti->oti_handle,
+                                                      filter_cancel_cookies_cb,
+                                                      fcc);
+                        else
+                                fsfilt_add_journal_cb(obd, 0, handle,
+                                                      filter_cancel_cookies_cb,
+                                                      fcc);
+                }
+        }
+
         rc = filter_finish_transno(exp, oti, rc);
         rc2 = fsfilt_commit(exp->exp_obd, dentry->d_inode, handle, 0);
         if (rc2) {
@@ -1737,7 +1782,9 @@ static int filter_setattr(struct obd_export *exp, struct obdo *oa,
         }
 
         oa->o_valid = OBD_MD_FLID;
-        obdo_from_inode(oa, dentry->d_inode, FILTER_VALID_FLAGS);
+        /* Quota release need uid/gid info */
+        obdo_from_inode(oa, dentry->d_inode, 
+                        FILTER_VALID_FLAGS | OBD_MD_FLUID | OBD_MD_FLGID);
 
 out_unlock:
         if (iattr.ia_valid & ATTR_SIZE)
@@ -1746,6 +1793,19 @@ out_unlock:
         pop_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL);
 
         f_dput(dentry);
+
+        /* trigger quota release */
+        if (rc == 0 && iattr.ia_valid & (ATTR_SIZE | ATTR_UID | ATTR_GID)) {
+                rc2 = qctxt_adjust_qunit(obd, &filter->fo_quota_ctxt, 
+                                         oa->o_uid, oa->o_gid, 1);
+                if (rc2)
+                        CERROR("error filter adjust qunit! (rc:%d)\n", rc2);
+                /* after owner changed, release quota for the original owner */
+                rc2 = qctxt_adjust_qunit(obd, &filter->fo_quota_ctxt,
+                                         orig_uid, orig_gid, 1);
+                if (rc2)
+                        CERROR("error filter adjust qunit! (rc:%d)\n", rc2);
+        }
         RETURN(rc);
 }
 
@@ -2026,7 +2086,7 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 cleanup_phase = 3;
 
-                rc = ll_vfs_create(dparent->d_inode, dchild, S_IFREG, NULL);
+                rc = ll_vfs_create(dparent->d_inode, dchild, S_IFREG | 0666, NULL);
                 if (rc) {
                         CERROR("create failed rc = %d\n", rc);
                         GOTO(cleanup, rc);
@@ -2216,6 +2276,8 @@ static int filter_destroy(struct obd_export *exp, struct obdo *oa,
                         memcpy(fcc, obdo_logcookie(oa), sizeof(*fcc));
         }
 
+        /* Quota release need uid/gid of inode */
+        obdo_from_inode(oa, dchild->d_inode, OBD_MD_FLUID|OBD_MD_FLGID);
         rc = filter_destroy_internal(obd, oa->o_id, dparent, dchild);
 
 cleanup:
@@ -2250,6 +2312,14 @@ cleanup:
                 LBUG();
         }
 
+        /* trigger quota release */
+        if (rc == 0) {
+                rc2 = qctxt_adjust_qunit(obd, &filter->fo_quota_ctxt, 
+                                         oa->o_uid, oa->o_gid, 1);
+                if (rc2)
+                        CERROR("error filter adjust qunit! (rc:%d)\n", rc2);
+        }
+
         RETURN(rc);
 }
 
@@ -2289,7 +2359,7 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa,
         if (!oa || !(oa->o_valid & OBD_MD_FLID)) {
                 rc = fsfilt_sync(exp->exp_obd, filter->fo_sb);
                 /* flush any remaining cancel messages out to the target */
-                ctxt = llog_get_context(exp->exp_obd, LLOG_UNLINK_REPL_CTXT);
+                ctxt = llog_get_context(exp->exp_obd, LLOG_MDS_OST_REPL_CTXT);
                 llog_sync(ctxt, exp);
                 RETURN(rc);
         }
@@ -2388,8 +2458,14 @@ static int filter_set_info(struct obd_export *exp, __u32 keylen,
         CWARN("%s: received MDS connection ("LPX64")\n",
               obd->obd_name, conn.cookie);
         memcpy(&obd->u.filter.fo_mdc_conn, &conn, sizeof(conn));
-        ctxt = llog_get_context(obd, LLOG_UNLINK_REPL_CTXT);
+
+        /* setup llog imports */
+        ctxt = llog_get_context(obd, LLOG_MDS_OST_REPL_CTXT);
         rc = llog_receptor_accept(ctxt, exp->exp_imp_reverse);
+        
+        /* setup the quota context import */
+        obd->u.filter.fo_quota_ctxt.lqc_import = exp->exp_imp_reverse;
+
         RETURN(rc);
 }
 
@@ -2461,7 +2537,68 @@ int filter_iocontrol(unsigned int cmd, struct obd_export *exp,
         RETURN(0);
 }
 
-static struct llog_operations filter_unlink_repl_logops;
+static int filter_quotacheck(struct obd_export *exp, struct obd_quotactl *oqctl)
+{
+        struct obd_run_ctxt saved;
+        struct obd_device *obd = exp->exp_obd;
+        int rc;
+
+        push_ctxt(&saved, &obd->obd_ctxt, NULL);
+
+        rc = fsfilt_quotacheck(obd, obd->u.filter.fo_sb, oqctl);
+        if (rc)
+                CERROR("%s: fsfilt_quotacheck: %d\n", obd->obd_name, rc);
+
+        pop_ctxt(&saved, &obd->obd_ctxt, NULL);
+
+        RETURN(rc);
+}
+
+static int filter_quotactl(struct obd_export *exp, struct obd_quotactl *oqctl)
+{
+        struct obd_device *obd = exp->exp_obd;
+        struct obd_run_ctxt saved;
+        int rc = 0;
+        ENTRY;
+
+        if (oqctl->qc_cmd == Q_QUOTAON || oqctl->qc_cmd == Q_QUOTAOFF ||
+            oqctl->qc_cmd == Q_GETOINFO || oqctl->qc_cmd == Q_GETOQUOTA ||
+            oqctl->qc_cmd == Q_GETQUOTA) {
+                push_ctxt(&saved, &obd->obd_ctxt, NULL);
+                rc = fsfilt_quotactl(obd, obd->u.filter.fo_sb, oqctl);
+                pop_ctxt(&saved, &obd->obd_ctxt, NULL);
+        } else if (oqctl->qc_cmd == Q_INITQUOTA) {
+                unsigned int uid = 0, gid = 0;
+
+                /* initialize quota limit to MIN_QLIMIT */
+                LASSERT(oqctl->qc_dqblk.dqb_valid == QIF_BLIMITS);
+                LASSERT(oqctl->qc_dqblk.dqb_bhardlimit == MIN_QLIMIT);
+                LASSERT(oqctl->qc_dqblk.dqb_bsoftlimit == 0);
+                oqctl->qc_cmd = Q_SETQUOTA;
+                rc = fsfilt_quotactl(obd, obd->u.filter.fo_sb, oqctl);
+                /* this value will be replied to client, we must restore it */
+                oqctl->qc_cmd = Q_INITQUOTA;
+                if (rc)
+                        RETURN(rc);
+
+                /* trigger qunit pre-acquire */
+                if (oqctl->qc_type == USRQUOTA)
+                        uid = oqctl->qc_id;
+                else
+                        gid = oqctl->qc_id;
+
+                rc = qctxt_adjust_qunit(obd, &obd->u.filter.fo_quota_ctxt, 
+                                        uid, gid, 1);
+        } else {
+                CERROR("%s: unsupported filter_quotactl command: %d\n",
+                       obd->obd_name, oqctl->qc_cmd);
+                LBUG();
+        }
+
+        RETURN(rc);
+}
+
+static struct llog_operations filter_mds_ost_repl_logops;
 static struct llog_operations filter_size_orig_logops = {
         lop_setup: llog_obd_origin_setup,
         lop_cleanup: llog_obd_origin_cleanup,
@@ -2475,18 +2612,19 @@ static int filter_llog_init(struct obd_device *obd, struct obd_device *tgt,
         int rc;
         ENTRY;
 
-        filter_unlink_repl_logops = llog_client_ops;
-        filter_unlink_repl_logops.lop_cancel = llog_obd_repl_cancel;
-        filter_unlink_repl_logops.lop_connect = llog_repl_connect;
-        filter_unlink_repl_logops.lop_sync = llog_obd_repl_sync;
+        filter_mds_ost_repl_logops = llog_client_ops;
+        filter_mds_ost_repl_logops.lop_cancel = llog_obd_repl_cancel;
+        filter_mds_ost_repl_logops.lop_connect = llog_repl_connect;
+        filter_mds_ost_repl_logops.lop_sync = llog_obd_repl_sync;
 
-        rc = llog_setup(obd, LLOG_UNLINK_REPL_CTXT, tgt, 0, NULL,
-                        &filter_unlink_repl_logops);
+        rc = llog_setup(obd, LLOG_MDS_OST_REPL_CTXT, tgt, 0, NULL,
+                        &filter_mds_ost_repl_logops);
         if (rc)
                 RETURN(rc);
+
         /* FIXME - assign unlink_cb for filter's recovery */
-        ctxt = llog_get_context(obd, LLOG_UNLINK_REPL_CTXT);
-        ctxt->llog_proc_cb = filter_recov_log_unlink_cb;
+        ctxt = llog_get_context(obd, LLOG_MDS_OST_REPL_CTXT);
+        ctxt->llog_proc_cb = filter_recov_log_mds_ost_cb;
 
         rc = llog_setup(obd, LLOG_SIZE_ORIG_CTXT, tgt, 0, NULL,
                         &filter_size_orig_logops);
@@ -2499,7 +2637,7 @@ static int filter_llog_finish(struct obd_device *obd, int count)
         int rc = 0, rc2 = 0;
         ENTRY;
 
-        ctxt = llog_get_context(obd, LLOG_UNLINK_REPL_CTXT);
+        ctxt = llog_get_context(obd, LLOG_MDS_OST_REPL_CTXT);
         if (ctxt)
                 rc = llog_cleanup(ctxt);
 
@@ -2546,6 +2684,8 @@ static struct obd_ops filter_obd_ops = {
         .o_llog_init      = filter_llog_init,
         .o_llog_finish    = filter_llog_finish,
         .o_iocontrol      = filter_iocontrol,
+        .o_quotacheck     = filter_quotacheck,
+        .o_quotactl       = filter_quotactl,
 };
 
 static struct obd_ops filter_sanobd_ops = {
index da3d9ba..e90fb60 100644 (file)
@@ -160,7 +160,7 @@ int filter_log_sz_change(struct llog_handle *cathandle,
 //int filter_get_catalog(struct obd_device *);
 void filter_cancel_cookies_cb(struct obd_device *obd, __u64 transno,
                               void *cb_data, int error);
-int filter_recov_log_unlink_cb(struct llog_handle *llh,
+int filter_recov_log_mds_ost_cb(struct llog_handle *llh,
                                struct llog_rec_hdr *rec, void *data);
 
 /* filter_san.c */
index a1eb318..d60f811 100644 (file)
@@ -792,7 +792,8 @@ void filter_grant_commit(struct obd_export *exp, int niocount,
 
 int filter_commitrw(int cmd, struct obd_export *exp, struct obdo *oa,
                     int objcount, struct obd_ioobj *obj, int niocount,
-                    struct niobuf_local *res, struct obd_trans_info *oti,int rc)
+                    struct niobuf_local *res, struct obd_trans_info *oti,
+                    int rc)
 {
         if (cmd == OBD_BRW_WRITE)
                 return filter_commitrw_write(exp, oa, objcount, obj, niocount,
index 722e23d..e477545 100644 (file)
@@ -34,6 +34,7 @@
 
 #include <linux/obd_class.h>
 #include <linux/lustre_fsfilt.h>
+#include <linux/lustre_quota.h>
 #include "filter_internal.h"
 
 #warning "implement writeback mode -bzzz"
@@ -47,6 +48,7 @@ struct dio_request {
         int               dr_max_pages;
         int               dr_npages;
         int               dr_error;
+        unsigned long     dr_flag;     /* indicating if there is client cache page in this rpc */
         struct page     **dr_pages;
         unsigned long    *dr_blocks;
         spinlock_t        dr_lock;
@@ -365,6 +367,71 @@ static void filter_clear_page_cache(struct inode *inode, struct bio *iobuf)
 #endif
 }
 
+static int filter_quota_enforcement(struct obd_device *obd,
+                                    unsigned int fsuid, unsigned int fsgid,
+                                    struct obd_ucred **ret_uc)
+{
+        struct filter_obd *filter = &obd->u.filter;
+        struct obd_ucred *uc = NULL;
+        ENTRY;
+
+        if (!sb_any_quota_enabled(filter->fo_sb))
+                RETURN(0);
+
+        OBD_ALLOC(uc, sizeof(*uc));
+        if (!uc)
+                RETURN(-ENOMEM);
+        *ret_uc = uc;
+
+        uc->ouc_fsuid = fsuid;
+        uc->ouc_fsgid = fsgid;
+        uc->ouc_cap = current->cap_effective;
+        if (!fsuid)
+                cap_raise(uc->ouc_cap, CAP_SYS_RESOURCE);
+        else
+                cap_lower(uc->ouc_cap, CAP_SYS_RESOURCE);
+        
+        RETURN(0);
+}
+
+static int filter_get_quota_flag(struct obd_device *obd,
+                                 struct obdo *oa)
+{
+        struct filter_obd *filter = &obd->u.filter;
+        int cnt;
+        int rc = 0, err;
+        ENTRY;
+
+        if (!sb_any_quota_enabled(filter->fo_sb))
+                RETURN(rc);
+
+        oa->o_flags = QUOTA_OK;
+
+        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+                struct obd_quotactl oqctl;
+
+                oqctl.qc_cmd = Q_GETQUOTA;
+                oqctl.qc_type = cnt;
+                oqctl.qc_id = (cnt == USRQUOTA) ? oa->o_uid : oa->o_gid;
+                err = fsfilt_quotactl(obd, filter->fo_sb, &oqctl);
+                if (err) {
+                        if (!rc)
+                                rc = err;
+                        continue;
+                }
+
+                /* set over quota flags for a uid/gid */
+                oa->o_valid |= (cnt == USRQUOTA) ?
+                               OBD_MD_FLUSRQUOTA : OBD_MD_FLGRPQUOTA;
+                if (oqctl.qc_dqblk.dqb_bhardlimit &&
+                   (toqb(oqctl.qc_dqblk.dqb_curspace) > oqctl.qc_dqblk.dqb_bhardlimit))
+                        oa->o_flags |= (cnt == USRQUOTA) ? 
+                                       OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA;
+        }
+
+        RETURN(rc);
+}
+
 /* Must be called with i_sem taken for writes; this will drop it */
 int filter_direct_io(int rw, struct dentry *dchild, void *iobuf,
                      struct obd_export *exp, struct iattr *attr,
@@ -374,6 +441,7 @@ int filter_direct_io(int rw, struct dentry *dchild, void *iobuf,
         struct dio_request *dreq = iobuf;
         struct inode *inode = dchild->d_inode;
         int blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
+        struct lustre_quota_ctxt *qctxt = &obd->u.filter.fo_quota_ctxt;
         int rc, rc2;
         ENTRY;
 
@@ -386,12 +454,33 @@ int filter_direct_io(int rw, struct dentry *dchild, void *iobuf,
         if (dreq->dr_npages == 0)
                 RETURN(0);
 
+        /* If there is any page in this write rpc that comes from client
+         * cache, we write the whole rpc without quota limit */
+        if (dreq->dr_flag & OBD_BRW_FROM_GRANT) {
+                cap_raise(current->cap_effective, CAP_SYS_RESOURCE);
+                dreq->dr_flag &= ~OBD_BRW_FROM_GRANT;
+        }
+        
+remap:
         rc = fsfilt_map_inode_pages(obd, inode,
                                     dreq->dr_pages, dreq->dr_npages,
                                     dreq->dr_blocks,
                                     obdfilter_created_scratchpad,
                                     rw == OBD_BRW_WRITE, NULL);
 
+        if (rc == -EDQUOT) {
+                LASSERT(rw == OBD_BRW_WRITE && 
+                        !cap_raised(current->cap_effective, CAP_SYS_RESOURCE));
+
+                /* Unfortunately, if quota master is too busy to handle the 
+                 * pre-dqacq in time or this user has exceeded quota limit, we 
+                 * have to wait for the completion of in flight dqacq/dqrel, 
+                 * then try again */
+                if (qctxt_wait_on_dqacq(obd, qctxt, inode->i_uid, 
+                                        inode->i_gid, 1) == -EAGAIN)
+                        goto remap;
+        }
+        
         if (rw == OBD_BRW_WRITE) {
                 if (rc == 0) {
                         filter_tally_write(&obd->u.filter,
@@ -415,7 +504,6 @@ int filter_direct_io(int rw, struct dentry *dchild, void *iobuf,
                         rc = rc2;
                 if (rc != 0)
                         RETURN(rc);
-
         }
 
         /* This is nearly osync_inode, without the waiting
@@ -474,6 +562,8 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
         unsigned long now = jiffies;
         int i, err, cleanup_phase = 0;
         struct obd_device *obd = exp->exp_obd;
+        struct filter_obd *filter = &obd->u.filter;
+        struct obd_ucred *uc = NULL;
         int   total_size = 0;
         ENTRY;
 
@@ -517,9 +607,20 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
                 this_size = lnb->offset + lnb->len;
                 if (this_size > iattr.ia_size)
                         iattr.ia_size = this_size;
+                /* if one page is a write-back page from client cache,
+                 * then mark that the whole io request can be over quota */
+                if (lnb->flags & OBD_BRW_FROM_GRANT)
+                        dreq->dr_flag |= OBD_BRW_FROM_GRANT;
         }
 
-        push_ctxt(&saved, &obd->obd_ctxt, NULL);
+        /* The client store the user credit information fsuid and fsgid
+         * in oa->o_uid and oa->o_gid. In case of quota enabled, we use 
+         * them to build the obd_ucred so as to enforce oss quota check */
+        rc = filter_quota_enforcement(obd, oa->o_uid, oa->o_gid, &uc);
+        if (rc)
+                GOTO(cleanup, rc);
+
+        push_ctxt(&saved, &obd->obd_ctxt, uc);
         cleanup_phase = 2;
 
         down(&inode->i_sem);
@@ -542,7 +643,12 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
         rc = filter_direct_io(OBD_BRW_WRITE, res->dentry, dreq, exp, &iattr,
                               oti, NULL);
         if (rc == 0)
-                obdo_from_inode(oa, inode, FILTER_VALID_FLAGS);
+                obdo_from_inode(oa, inode, 
+                                FILTER_VALID_FLAGS | OBD_MD_FLUID | OBD_MD_FLGID);
+        else 
+                obdo_from_inode(oa, inode, OBD_MD_FLUID | OBD_MD_FLGID);
+
+        filter_get_quota_flag(obd, oa);
 
         fsfilt_check_slow(now, obd_timeout, "direct_io");
 
@@ -560,7 +666,9 @@ cleanup:
 
         switch (cleanup_phase) {
         case 2:
-                pop_ctxt(&saved, &obd->obd_ctxt, NULL);
+                pop_ctxt(&saved, &obd->obd_ctxt, uc);
+                if (uc)
+                        OBD_FREE(uc, sizeof(*uc));
                 LASSERT(current->journal_info == NULL);
         case 1:
                 filter_free_iobuf(dreq);
@@ -569,5 +677,12 @@ cleanup:
                 f_dput(res->dentry);
         }
 
+        /* trigger quota pre-acquire */
+        if (rc == 0) {
+                err = qctxt_adjust_qunit(obd, &filter->fo_quota_ctxt, 
+                                         oa->o_uid, oa->o_gid, 1);
+                if (err)
+                        CERROR("error filter ajust qunit! (rc:%d)\n", err);
+        }
         RETURN(rc);
 }
index d8fe7c8..34ad008 100644 (file)
@@ -115,36 +115,116 @@ void filter_cancel_cookies_cb(struct obd_device *obd, __u64 transno,
 /* Callback for processing the unlink log record received from MDS by 
  * llog_client_api.
  */
-int filter_recov_log_unlink_cb(struct llog_handle *llh, 
-                               struct llog_rec_hdr *rec, void *data)
+static int filter_recov_log_unlink_cb(struct llog_ctxt *ctxt,
+                                      struct llog_rec_hdr *rec,
+                                      struct llog_cookie *cookie)
 {
-        struct llog_ctxt *ctxt = llh->lgh_ctxt;
         struct obd_device *obd = ctxt->loc_obd;
         struct obd_export *exp = obd->obd_self_export;
-        struct llog_cookie cookie;
-        struct llog_gen_rec *lgr;
         struct llog_unlink_rec *lur;
         struct obdo *oa;
         obd_id oid;
         int rc = 0;
         ENTRY;
 
+        lur = (struct llog_unlink_rec *)rec;
+        oa = obdo_alloc();
+        if (oa == NULL) 
+                RETURN(-ENOMEM);
+        oa->o_valid |= OBD_MD_FLCOOKIE;
+        oa->o_id = lur->lur_oid;
+        oa->o_gr = lur->lur_ogen;
+        memcpy(obdo_logcookie(oa), cookie, sizeof(*cookie));
+        oid = oa->o_id;
+
+        rc = obd_destroy(exp, oa, NULL, NULL);
+        obdo_free(oa);
+        if (rc == -ENOENT) {
+                CDEBUG(D_HA, "object already removed, send cookie\n");
+                llog_cancel(ctxt, NULL, 1, cookie, 0);
+                RETURN(0);
+        }
+
+        if (rc == 0)
+                CDEBUG(D_HA, "object: "LPU64" in record is destroyed\n", oid);
+
+        RETURN(rc);
+}
+
+/* Callback for processing the setattr log record received from MDS by
+ * llog_client_api.
+ */
+static int filter_recov_log_setattr_cb(struct llog_ctxt *ctxt,
+                                       struct llog_rec_hdr *rec,
+                                       struct llog_cookie *cookie)
+{
+        struct obd_device *obd = ctxt->loc_obd;
+        struct obd_export *exp = obd->obd_self_export;
+        struct llog_setattr_rec *lsr;
+        struct obdo *oa;
+        obd_id oid;
+        int rc = 0;
+        ENTRY;
+                                                                                                                             
+        lsr = (struct llog_setattr_rec *)rec;
+        oa = obdo_alloc();
+                                                                                                                             
+        oa->o_valid |= (OBD_MD_FLID | OBD_MD_FLUID | OBD_MD_FLGID |
+                        OBD_MD_FLCOOKIE);
+        oa->o_id = lsr->lsr_oid;
+        oa->o_gr = lsr->lsr_ogen;
+        oa->o_uid = lsr->lsr_uid;
+        oa->o_gid = lsr->lsr_gid;
+        memcpy(obdo_logcookie(oa), cookie, sizeof(*cookie));
+        oid = oa->o_id;
+
+        rc = obd_setattr(exp, oa, NULL, NULL);
+        obdo_free(oa);
+
+        if (rc == -ENOENT) {
+                CDEBUG(D_HA, "object already removed, send cookie\n");
+                llog_cancel(ctxt, NULL, 1, cookie, 0);
+                RETURN(0);
+        }
+        if (rc == 0)
+                CDEBUG(D_HA, "object: "LPU64" in record is chown/chgrp\n", oid);
+                                                                                                                             
+        RETURN(rc);
+}
+
+int filter_recov_log_mds_ost_cb(struct llog_handle *llh,
+                               struct llog_rec_hdr *rec, void *data)
+{
+        struct llog_ctxt *ctxt = llh->lgh_ctxt;
+        struct llog_cookie cookie;
+        int rc = 0;
+        ENTRY;
+
         if (!(llh->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN)) {
                 CERROR("log is not plain\n");
                 RETURN(-EINVAL);
         }
         if (rec->lrh_type != MDS_UNLINK_REC &&
+            rec->lrh_type != MDS_SETATTR_REC &&
             rec->lrh_type != LLOG_GEN_REC) {
                 CERROR("log record type error\n");
                 RETURN(-EINVAL);
         }
+
         cookie.lgc_lgl = llh->lgh_id;
-        cookie.lgc_subsys = LLOG_UNLINK_ORIG_CTXT;
+        cookie.lgc_subsys = LLOG_MDS_OST_ORIG_CTXT;
         cookie.lgc_index = rec->lrh_index;
 
-        if (rec->lrh_type == LLOG_GEN_REC) {
-                lgr = (struct llog_gen_rec *)rec;
+        switch (rec->lrh_type) {
+        case MDS_UNLINK_REC:
+                rc = filter_recov_log_unlink_cb(ctxt, rec, &cookie);
+                break;
+        case MDS_SETATTR_REC:
+                rc = filter_recov_log_setattr_cb(ctxt, rec, &cookie);
+                break;
+        case LLOG_GEN_REC: {
+                struct llog_gen_rec *lgr = (struct llog_gen_rec *)rec;
                 if (llog_gen_lt(lgr->lgr_gen, ctxt->loc_gen))
                         rc = 0;
                 else
@@ -152,28 +232,10 @@ int filter_recov_log_unlink_cb(struct llog_handle *llh,
                 CWARN("fetch generation log, send cookie\n");
                 llog_cancel(ctxt, NULL, 1, &cookie, 0);
                 RETURN(rc);
+                }
+        default:
+                break;
         }
 
-        lur = (struct llog_unlink_rec *)rec;
-        oa = obdo_alloc();
-        if (oa == NULL) 
-                RETURN(-ENOMEM);
-        oa->o_valid |= OBD_MD_FLCOOKIE;
-        oa->o_id = lur->lur_oid;
-        oa->o_gr = lur->lur_ogen;
-        memcpy(obdo_logcookie(oa), &cookie, sizeof(cookie));
-        oid = oa->o_id;
-
-        rc = obd_destroy(exp, oa, NULL, NULL);
-        obdo_free(oa);
-        if (rc == -ENOENT) {
-                CDEBUG(D_HA, "object already removed, send cookie\n");
-                llog_cancel(ctxt, NULL, 1, &cookie, 0);
-                RETURN(0);
-        }
-
-        if (rc == 0)
-                CDEBUG(D_HA, "object: "LPU64" in record is destroyed\n", oid);
-
         RETURN(rc);
 }
index 2fc8420..61d0fb8 100644 (file)
@@ -121,6 +121,121 @@ int lprocfs_filter_wr_readcache(struct file *file, const char *buffer,
         return count;
 }
 
+static int lprocfs_filter_rd_bunit(char *page, char **start, off_t off, int count, 
+                                   int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+        return snprintf(page, count, "%lu\n", 
+                        obd->u.filter.fo_quota_ctxt.lqc_bunit_sz);
+}
+
+static int lprocfs_filter_rd_iunit(char *page, char **start, off_t off, int count, 
+                                   int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+        return snprintf(page, count, "%lu\n", 
+                        obd->u.filter.fo_quota_ctxt.lqc_iunit_sz);
+}
+
+static int lprocfs_filter_wr_bunit(struct file *file, const char *buffer,
+                                   unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        int val, rc = 0;
+        LASSERT(obd != NULL);
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val % QUOTABLOCK_SIZE ||
+            val <= obd->u.filter.fo_quota_ctxt.lqc_btune_sz)
+                return -EINVAL;
+
+        obd->u.filter.fo_quota_ctxt.lqc_bunit_sz = val;
+        return count;
+}
+
+static int lprocfs_filter_wr_iunit(struct file *file, const char *buffer,
+                                   unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        int val, rc = 0;
+        LASSERT(obd != NULL);
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val <= obd->u.filter.fo_quota_ctxt.lqc_itune_sz)
+                return -EINVAL;
+
+        obd->u.filter.fo_quota_ctxt.lqc_iunit_sz = val;
+        return count;
+}
+
+static int lprocfs_filter_rd_btune(char *page, char **start, off_t off, int count, 
+                                   int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+        return snprintf(page, count, "%lu\n", 
+                        obd->u.filter.fo_quota_ctxt.lqc_btune_sz);
+}
+
+static int lprocfs_filter_rd_itune(char *page, char **start, off_t off, int count, 
+                                   int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+        return snprintf(page, count, "%lu\n", 
+                        obd->u.filter.fo_quota_ctxt.lqc_itune_sz);
+}
+
+static int lprocfs_filter_wr_btune(struct file *file, const char *buffer,
+                                   unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        int val, rc = 0;
+        LASSERT(obd != NULL);
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+        
+        if (val <= QUOTABLOCK_SIZE * MIN_QLIMIT || val % QUOTABLOCK_SIZE || 
+            val >= obd->u.filter.fo_quota_ctxt.lqc_bunit_sz)
+                return -EINVAL;
+
+        obd->u.filter.fo_quota_ctxt.lqc_btune_sz = val;
+        return count;
+}
+
+static int lprocfs_filter_wr_itune(struct file *file, const char *buffer,
+                                   unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        int val, rc = 0;
+        LASSERT(obd != NULL);
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+        
+        if (val <= MIN_QLIMIT || 
+            val >= obd->u.filter.fo_quota_ctxt.lqc_iunit_sz)
+                return -EINVAL;
+
+        obd->u.filter.fo_quota_ctxt.lqc_itune_sz = val;
+        return count;
+}
+
 static struct lprocfs_vars lprocfs_obd_vars[] = {
         { "uuid",         lprocfs_rd_uuid,          0, 0 },
         { "blocksize",    lprocfs_rd_blksize,       0, 0 },
@@ -142,6 +257,15 @@ static struct lprocfs_vars lprocfs_obd_vars[] = {
         { "readcache_max_filesize",
                           lprocfs_filter_rd_readcache,
                           lprocfs_filter_wr_readcache, 0 },
+        { "quota_bunit_sz", lprocfs_filter_rd_bunit, 
+                            lprocfs_filter_wr_bunit, 0},
+        { "quota_btune_sz", lprocfs_filter_rd_btune,
+                            lprocfs_filter_wr_btune, 0},
+        { "quota_iunit_sz", lprocfs_filter_rd_iunit, 
+                            lprocfs_filter_wr_iunit, 0},
+        { "quota_itune_sz", lprocfs_filter_rd_itune,
+                            lprocfs_filter_wr_itune, 0},
+
         { 0 }
 };
 
index 568a725..52fece6 100644 (file)
@@ -1,4 +1,4 @@
 MODULES := osc
-osc-objs := osc_request.o lproc_osc.o osc_lib.o osc_create.o
+osc-objs := osc_request.o lproc_osc.o osc_lib.o osc_create.o osc_quota.o
 
 @INCLUDE_RULES@
index 20851a0..8fe19d9 100644 (file)
@@ -52,6 +52,14 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa,
               struct lov_stripe_md **ea, struct obd_trans_info *oti);
 void oscc_init(struct obd_device *obd);
 void osc_wake_cache_waiters(struct client_obd *cli);
+int osc_get_quota_flag(struct client_obd *cli, unsigned int uid,
+                       unsigned int gid);
+int osc_set_quota_flag(struct client_obd *cli,
+                       unsigned int uid, unsigned int gid,
+                       obd_flag valid, obd_flag flags);
+int osc_qinfo_cleanup(struct client_obd *cli);
+int osc_qinfo_init(void);
+void osc_qinfo_exit(void);
 
 #ifdef __KERNEL__
 int lproc_osc_attach_seqstat(struct obd_device *dev);
diff --git a/lustre/osc/osc_quota.c b/lustre/osc/osc_quota.c
new file mode 100644 (file)
index 0000000..e84027b
--- /dev/null
@@ -0,0 +1,244 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (c) 2003 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_OSC
+                                                                                                                             
+#ifdef __KERNEL__
+# include <linux/module.h>
+# include <linux/obd.h>
+# include <linux/obd_ost.h>
+# include <linux/lustre_net.h>
+# include <linux/lustre_dlm.h>
+# include <linux/lustre_lib.h>
+# include <linux/lustre_compat25.h>
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#  include <linux/workqueue.h>
+#  include <linux/smp_lock.h>
+# else
+#  include <linux/locks.h>
+# endif
+#endif
+
+#include "osc_internal.h"
+
+struct osc_quota_info {
+        struct list_head        oqi_hash;       /* hash list */
+        struct client_obd      *oqi_cli;        /* osc obd */ 
+        unsigned int            oqi_id;         /* uid/gid of a file */
+        short                   oqi_type;       /* quota type */
+        unsigned long           oqi_flag;       /* flag, NO_QUOTA */
+};
+
+spinlock_t qinfo_list_lock = SPIN_LOCK_UNLOCKED;
+
+static struct list_head qinfo_hash[NR_DQHASH];
+/* SLAB cache for client quota context */
+kmem_cache_t *qinfo_cachep = NULL;
+
+static inline int const hashfn(struct client_obd *cli, 
+                               unsigned long id, 
+                               int type)
+{
+        unsigned long tmp = ((unsigned long)cli>>6) ^ id;
+        tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH;
+        return tmp;
+}
+
+static inline void insert_qinfo_hash(struct osc_quota_info *oqi)
+{
+        struct list_head *head = qinfo_hash + 
+                hashfn(oqi->oqi_cli, oqi->oqi_id, oqi->oqi_type);
+        list_add(&oqi->oqi_hash, head);
+}
+
+static inline void remove_qinfo_hash(struct osc_quota_info *oqi)
+{
+        list_del_init(&oqi->oqi_hash);
+}
+
+static inline struct osc_quota_info *find_qinfo(struct client_obd *cli,
+                                                unsigned int id, int type)
+{
+        unsigned int hashent = hashfn(cli, id, type);
+        struct list_head *head;
+        struct osc_quota_info *oqi;
+                                                                                                                             
+        for (head = qinfo_hash[hashent].next;
+             head != qinfo_hash+hashent; head = head->next) {
+                oqi = list_entry(head, struct osc_quota_info, oqi_hash);
+                LASSERT(oqi->oqi_flag = NO_QUOTA);
+                if (oqi->oqi_cli == cli &&
+                    oqi->oqi_id == id && oqi->oqi_type == type)
+                        return oqi;
+        }
+        return NULL;
+}
+
+static struct osc_quota_info *alloc_qinfo(struct client_obd *cli,
+                                          unsigned int id, int type)
+{
+        struct osc_quota_info *oqi;
+        ENTRY;
+                                                                                                                             
+        OBD_SLAB_ALLOC(oqi, qinfo_cachep, SLAB_KERNEL,
+                       sizeof(*oqi));
+        if(!oqi)
+                RETURN(NULL);
+                                                                                                                             
+        INIT_LIST_HEAD(&oqi->oqi_hash);
+        oqi->oqi_cli = cli;
+        oqi->oqi_id = id;
+        oqi->oqi_type = type;
+                                                                                                                             
+        RETURN(oqi);
+}
+                                                                                                                             
+static void free_qinfo(struct osc_quota_info *oqi)
+{
+        OBD_SLAB_FREE(oqi, qinfo_cachep, sizeof(*oqi));
+}
+
+int osc_get_quota_flag(struct client_obd *cli, 
+                       unsigned int uid, unsigned int gid)
+{
+        unsigned int id;
+        int cnt, rc = QUOTA_OK;
+        ENTRY;
+
+        spin_lock(&qinfo_list_lock);
+        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+                struct osc_quota_info *oqi = NULL;
+
+                id = (cnt == USRQUOTA) ? uid : gid;
+                oqi = find_qinfo(cli, id, cnt);
+                if (oqi) {
+                        rc = NO_QUOTA;
+                        break;
+                }
+        }
+        spin_unlock(&qinfo_list_lock);
+
+        RETURN(rc);
+}
+
+int osc_set_quota_flag(struct client_obd *cli, 
+                       unsigned int uid, unsigned int gid,
+                       obd_flag valid, obd_flag flags)
+{
+        unsigned int id;
+        obd_flag noquota;
+        int cnt, rc = 0;
+        ENTRY;
+
+        spin_lock(&qinfo_list_lock);
+
+        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+                struct osc_quota_info *oqi = NULL;
+
+                if (!(valid & ((cnt == USRQUOTA) ? 
+                    OBD_MD_FLUSRQUOTA : OBD_MD_FLGRPQUOTA)))
+                        continue; 
+
+                id = (cnt == USRQUOTA) ? uid : gid;
+                noquota = (cnt == USRQUOTA) ? 
+                    (flags & OBD_FL_NO_USRQUOTA) : (flags & OBD_FL_NO_GRPQUOTA);
+                
+                oqi = find_qinfo(cli, id, cnt);
+                
+                if (oqi && !noquota) {
+                        remove_qinfo_hash(oqi);
+                        free_qinfo(oqi);
+                } else if (!oqi && noquota) {
+                        oqi = alloc_qinfo(cli, id, cnt);
+                        if (!oqi) {
+                                CERROR("not enough mem!\n");
+                                rc = -ENOMEM;
+                                break;
+                        }
+                        oqi->oqi_flag = NO_QUOTA;
+                        insert_qinfo_hash(oqi);
+                }
+        }
+
+        spin_unlock(&qinfo_list_lock);
+
+        RETURN(rc);
+}
+
+int osc_qinfo_cleanup(struct client_obd *cli)
+{
+        struct osc_quota_info *oqi, *n;
+        int i;
+        ENTRY;
+
+        spin_lock(&qinfo_list_lock);
+        for (i = 0; i < NR_DQHASH; i++) {
+                list_for_each_entry_safe(oqi, n, &qinfo_hash[i], oqi_hash) {
+                        if (oqi->oqi_cli != cli)
+                                continue;
+                        remove_qinfo_hash(oqi);
+                        free_qinfo(oqi);
+                }
+        }
+        spin_unlock(&qinfo_list_lock);
+
+        RETURN(0);
+}
+
+int osc_qinfo_init(void)
+{
+        int i;
+        ENTRY;
+
+        LASSERT(qinfo_cachep == NULL);
+        qinfo_cachep = kmem_cache_create("osc_quota_info",
+                                         sizeof(struct osc_quota_info),
+                                         0, 0, NULL, NULL);
+        if (!qinfo_cachep)
+                RETURN(-ENOMEM);
+
+        for (i = 0; i < NR_DQHASH; i++)
+                INIT_LIST_HEAD(qinfo_hash + i);
+
+        RETURN(0);        
+}
+
+void osc_qinfo_exit(void)
+{
+        struct osc_quota_info *oqi, *n;
+        int i;
+        ENTRY;
+                                                                                                                             
+        spin_lock(&qinfo_list_lock);
+        for (i = 0; i < NR_DQHASH; i++) {
+                list_for_each_entry_safe(oqi, n, &qinfo_hash[i], oqi_hash) {
+                        remove_qinfo_hash(oqi);
+                        free_qinfo(oqi);
+                }
+        }
+        spin_unlock(&qinfo_list_lock);
+        
+        LASSERTF(kmem_cache_destroy(qinfo_cachep) == 0,
+                 "couldn't destroy osc quota info slab\n"); 
+}
index c6f2a10..95cd361 100644 (file)
@@ -278,6 +278,35 @@ out:
         RETURN(0);
 }
 
+static int osc_setattr_async(struct obd_export *exp, struct obdo *oa,
+                       struct lov_stripe_md *md, struct obd_trans_info *oti)
+{
+        struct ptlrpc_request *request;
+        struct ost_body *body;
+        int rc = 0, size = sizeof(*body);
+        ENTRY;
+
+        LASSERT(oti);
+
+        request = ptlrpc_prep_req(class_exp2cliimp(exp), OST_SETATTR, 1,
+                                  &size, NULL);
+        if (!request)
+                RETURN(-ENOMEM);
+
+        body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof(*body));
+
+        if (oa->o_valid & OBD_MD_FLCOOKIE) 
+                memcpy(obdo_logcookie(oa), oti->oti_logcookies,
+                       sizeof(*oti->oti_logcookies));
+
+        memcpy(&body->oa, oa, sizeof(*oa));
+        request->rq_replen = lustre_msg_size(1, &size);
+        /* do mds to ost setattr asynchronouly */                                                                       
+        ptlrpcd_add_req(request);
+                                                                                                                             
+        RETURN(rc);
+}
+
 int osc_real_create(struct obd_export *exp, struct obdo *oa,
                     struct lov_stripe_md **ea, struct obd_trans_info *oti)
 {
@@ -848,6 +877,12 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa,
                 RETURN(-EPROTO);
         }
 
+        /* set/clear over quota flag for a uid/gid */
+        if (req->rq_reqmsg->opc == OST_WRITE &&
+            body->oa.o_valid & (OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA))
+                osc_set_quota_flag(cli, body->oa.o_uid, body->oa.o_gid, 
+                                   body->oa.o_valid, body->oa.o_flags);
+
         osc_update_grant(cli, body);
         memcpy(oa, &body->oa, sizeof(*oa));
 
@@ -1343,6 +1378,18 @@ static struct ptlrpc_request *osc_build_req(struct client_obd *cli,
                 GOTO(out, req = ERR_PTR(rc));
         }
 
+        /* To enforce quota on oss, we need pass the client's user credit
+         * information to ost. We chose to store the fsuid and fsgid in 
+         * oa->o_uid and oa->o_gid since the two fields haven't been used
+         * at present. And we chose one page's user credit information as
+         * the whole rpc's credit information. FIXME */
+        if (cmd == OBD_BRW_WRITE) {
+                struct obd_ucred ouc;
+                ops->ap_get_ucred(caller_data, &ouc);
+                oa->o_uid = ouc.ouc_fsuid;
+                oa->o_gid = ouc.ouc_fsgid;
+        }
+
         LASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
         aa = (struct osc_brw_async_args *)&req->rq_async_args;
         aa->aa_oa = oa;
@@ -1891,7 +1938,7 @@ static int osc_queue_async_io(struct obd_export *exp, struct lov_stripe_md *lsm,
         struct client_obd *cli = &exp->exp_obd->u.cli;
         struct osc_async_page *oap;
         struct loi_oap_pages *lop;
-        int rc;
+        int rc = 0;
         ENTRY;
 
         oap = oap_from_cookie(cookie);
@@ -1906,6 +1953,25 @@ static int osc_queue_async_io(struct obd_export *exp, struct lov_stripe_md *lsm,
             !list_empty(&oap->oap_rpc_item))
                 RETURN(-EBUSY);
 
+        /* check if the file's owner/group is over quota */
+        if (cmd == OBD_BRW_WRITE){
+                struct obd_async_page_ops *ops;
+                struct obdo *oa = NULL;
+                                                                                                                             
+                oa = obdo_alloc();
+                if (oa == NULL)
+                        RETURN(-ENOMEM);
+
+                ops = oap->oap_caller_ops;
+                ops->ap_fill_obdo(oap->oap_caller_data, cmd, oa);
+                if (osc_get_quota_flag(cli, oa->o_uid, oa->o_gid) == NO_QUOTA)
+                        rc = -EDQUOT;
+
+                obdo_free(oa);
+                if (rc)
+                        RETURN(rc);
+        }
+
         if (loi == NULL)
                 loi = &lsm->lsm_oinfo[0];
 
@@ -2714,6 +2780,98 @@ static int osc_getstripe(struct lov_stripe_md *lsm, struct lov_user_md *lump)
         RETURN(rc);
 }
 
+static int osc_quotacheck(struct obd_export *exp, struct obd_quotactl *oqctl)
+{
+        struct client_obd *cli = &exp->exp_obd->u.cli;
+        struct ptlrpc_request *req;
+        struct obd_quotactl *body;
+        int size = sizeof(*body);
+        int rc;
+        ENTRY;
+
+        req = ptlrpc_prep_req(class_exp2cliimp(exp), OST_QUOTACHECK, 1, &size,
+                              NULL);
+        if (!req)
+                GOTO(out, rc = -ENOMEM);
+
+        body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body));
+        memcpy(body, oqctl, sizeof(*body));
+
+        req->rq_replen = lustre_msg_size(0, NULL);
+
+        spin_lock(&cli->cl_qchk_lock);
+        cli->cl_qchk_stat = CL_QUOTACHECKING;
+        spin_unlock(&cli->cl_qchk_lock);
+
+        rc = ptlrpc_queue_wait(req);
+        if (rc) {
+                spin_lock(&cli->cl_qchk_lock);
+                cli->cl_qchk_stat = rc;
+                spin_unlock(&cli->cl_qchk_lock);
+        }
+ out:
+        ptlrpc_req_finished(req);
+        RETURN (rc);
+}
+
+static int osc_poll_quotacheck(struct obd_export *exp,
+                                  struct if_quotacheck *qchk)
+{
+        struct client_obd *cli = &exp->exp_obd->u.cli;
+        int stat;
+        ENTRY;
+                                                                                                                 
+        spin_lock(&cli->cl_qchk_lock);
+        stat = cli->cl_qchk_stat;
+        spin_unlock(&cli->cl_qchk_lock);
+                                                                                                                 
+        qchk->stat = stat;
+        if (stat == CL_QUOTACHECKING) {
+                qchk->stat = -ENODATA;
+                stat = 0;
+        } else if (qchk->stat) {
+                if (qchk->stat > CL_QUOTACHECKING)
+                        qchk->stat = stat = -EINTR;
+                                                                                                                 
+                strncpy(qchk->obd_type, "obdfilter", 10);
+                qchk->obd_uuid = cli->cl_import->imp_target_uuid;
+        }
+        RETURN(stat);
+}
+
+static int osc_quotactl(struct obd_export *exp, struct obd_quotactl *oqctl)
+{
+        struct ptlrpc_request *req;
+        struct obd_quotactl *oqc;
+        int size = sizeof(*oqctl);
+        int rc;
+        ENTRY;
+
+        req = ptlrpc_prep_req(class_exp2cliimp(exp), OST_QUOTACTL, 1, &size,
+                              NULL);
+        if (!req)
+                GOTO(out, rc = -ENOMEM);
+
+        memcpy(lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*oqctl)), oqctl, size);
+
+        req->rq_replen = lustre_msg_size(1, &size);
+
+        rc = ptlrpc_queue_wait(req);
+        if (!rc) {
+                oqc = lustre_swab_repbuf(req, 0, sizeof (*oqc),
+                                         lustre_swab_obd_quotactl);
+                if (oqc == NULL) {
+                        CERROR ("Can't unpack mds_body\n");
+                        GOTO(out, rc = -EPROTO);
+                }
+
+                memcpy(oqctl, oqc, sizeof(*oqctl));
+        }
+out:
+        ptlrpc_req_finished(req);
+        RETURN (rc);
+}
+
 static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                          void *karg, void *uarg)
 {
@@ -2788,6 +2946,9 @@ static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                 err = ptlrpc_set_import_active(obd->u.cli.cl_import,
                                                data->ioc_offset);
                 GOTO(out, err);
+        case OBD_IOC_POLL_QUOTACHECK:
+                err = osc_poll_quotacheck(exp, (struct if_quotacheck *)karg);
+                GOTO(out, err);
         default:
                 CDEBUG(D_INODE, "unrecognised ioctl %#x by %s\n", cmd, current->comm);
                 GOTO(out, err = -ENOTTY);
@@ -2909,7 +3070,7 @@ static int osc_set_info(struct obd_export *exp, obd_count keylen,
         rc = ptlrpc_queue_wait(req);
         ptlrpc_req_finished(req);
 
-        ctxt = llog_get_context(exp->exp_obd, LLOG_UNLINK_ORIG_CTXT);
+        ctxt = llog_get_context(exp->exp_obd, LLOG_MDS_OST_ORIG_CTXT);
         if (ctxt) {
                 if (rc == 0)
                         rc = llog_initiator_connect(ctxt);
@@ -2930,21 +3091,21 @@ static struct llog_operations osc_size_repl_logops = {
         lop_cancel: llog_obd_repl_cancel
 };
 
-static struct llog_operations osc_unlink_orig_logops;
+static struct llog_operations osc_mds_ost_orig_logops;
 static int osc_llog_init(struct obd_device *obd, struct obd_device *tgt,
                         int count, struct llog_catid *catid)
 {
         int rc;
         ENTRY;
 
-        osc_unlink_orig_logops = llog_lvfs_ops;
-        osc_unlink_orig_logops.lop_setup = llog_obd_origin_setup;
-        osc_unlink_orig_logops.lop_cleanup = llog_obd_origin_cleanup;
-        osc_unlink_orig_logops.lop_add = llog_obd_origin_add;
-        osc_unlink_orig_logops.lop_connect = llog_origin_connect;
+        osc_mds_ost_orig_logops = llog_lvfs_ops;
+        osc_mds_ost_orig_logops.lop_setup = llog_obd_origin_setup;
+        osc_mds_ost_orig_logops.lop_cleanup = llog_obd_origin_cleanup;
+        osc_mds_ost_orig_logops.lop_add = llog_obd_origin_add;
+        osc_mds_ost_orig_logops.lop_connect = llog_origin_connect;
 
-        rc = llog_setup(obd, LLOG_UNLINK_ORIG_CTXT, tgt, count,
-                        &catid->lci_logid, &osc_unlink_orig_logops);
+        rc = llog_setup(obd, LLOG_MDS_OST_ORIG_CTXT, tgt, count,
+                        &catid->lci_logid, &osc_mds_ost_orig_logops);
         if (rc)
                 RETURN(rc);
 
@@ -2959,7 +3120,7 @@ static int osc_llog_finish(struct obd_device *obd, int count)
         int rc = 0, rc2 = 0;
         ENTRY;
 
-        ctxt = llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT);
+        ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT);
         if (ctxt)
                 rc = llog_cleanup(ctxt);
 
@@ -2972,7 +3133,6 @@ static int osc_llog_finish(struct obd_device *obd, int count)
         RETURN(rc);
 }
 
-
 static int osc_disconnect(struct obd_export *exp)
 {
         struct obd_device *obd = class_exp2obd(exp);
@@ -3079,6 +3239,7 @@ int osc_setup(struct obd_device *obd, obd_count len, void *buf)
 int osc_cleanup(struct obd_device *obd)
 {
         struct osc_creator *oscc = &obd->u.cli.cl_oscc;
+        struct client_obd *cli = &obd->u.cli;
         int rc;
 
         ptlrpc_lprocfs_unregister_obd(obd);
@@ -3088,6 +3249,9 @@ int osc_cleanup(struct obd_device *obd)
         oscc->oscc_flags &= ~OSCC_FLAG_RECOVERING;
         oscc->oscc_flags |= OSCC_FLAG_EXITING;
         spin_unlock(&oscc->oscc_lock);
+        
+        /* free memory of osc quota cache */
+        osc_qinfo_cleanup(cli);
 
         rc = client_obd_cleanup(obd);
         ptlrpcd_decref();
@@ -3112,6 +3276,7 @@ struct obd_ops osc_obd_ops = {
         .o_getattr              = osc_getattr,
         .o_getattr_async        = osc_getattr_async,
         .o_setattr              = osc_setattr,
+        .o_setattr_async        = osc_setattr_async,
         .o_brw                  = osc_brw,
         .o_brw_async            = osc_brw_async,
         .o_prep_async_page      = osc_prep_async_page,
@@ -3134,6 +3299,8 @@ struct obd_ops osc_obd_ops = {
         .o_import_event         = osc_import_event,
         .o_llog_init            = osc_llog_init,
         .o_llog_finish          = osc_llog_finish,
+        .o_quotacheck           = osc_quotacheck,
+        .o_quotactl             = osc_quotactl,
 };
 
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
@@ -3194,6 +3361,8 @@ int __init osc_init(void)
         if (rc)
                 class_unregister_type(LUSTRE_OSC_NAME);
 #endif
+        
+        rc = osc_qinfo_init();
 
         RETURN(rc);
 }
@@ -3201,6 +3370,7 @@ int __init osc_init(void)
 #ifdef __KERNEL__
 static void /*__exit*/ osc_exit(void)
 {
+        osc_qinfo_exit();
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
         class_unregister_type(LUSTRE_SANOSC_NAME);
 #endif
index 44d4467..12d61cc 100644 (file)
 #include <linux/lprocfs_status.h>
 #include <linux/lustre_commit_confd.h>
 #include <libcfs/list.h>
+#include <linux/lustre_quota.h>
 #include "ost_internal.h"
 
+static struct quotacheck_info qchkinfo;
+
 void oti_init(struct obd_trans_info *oti, struct ptlrpc_request *req)
 {
         if (oti == NULL)
@@ -913,6 +916,131 @@ static int ost_filter_recovery_request(struct ptlrpc_request *req,
         }
 }
 
+static int ost_quotacheck_callback(struct obd_export *exp,
+                                   struct obd_quotactl *oqctl)
+{
+        struct ptlrpc_request *req;
+        struct obd_quotactl *body;
+        int rc, size = sizeof(*oqctl);
+
+        req = ptlrpc_prep_req(exp->exp_imp_reverse, OBD_QC_CALLBACK,
+                              1, &size, NULL);
+        if (!req)
+                RETURN(-ENOMEM);
+
+        body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body));
+        memcpy(body, oqctl, sizeof(*oqctl));
+
+        req->rq_replen = lustre_msg_size(0, NULL);
+
+        rc = ptlrpc_queue_wait(req);
+        ptlrpc_req_finished(req);
+
+        RETURN(rc);
+}
+
+static int ost_quotacheck_thread(void *data)
+{
+        unsigned long flags;
+        struct quotacheck_info *qchki = data;
+        struct obd_export *exp;
+        struct obd_quotactl *oqctl;
+        struct filter_obd *filter;
+        int rc;
+                                                                                                                 
+        lock_kernel();
+        ptlrpc_daemonize();
+                                                                                                                 
+        SIGNAL_MASK_LOCK(current, flags);
+        sigfillset(&current->blocked);
+        RECALC_SIGPENDING;
+        SIGNAL_MASK_UNLOCK(current, flags);
+
+        THREAD_NAME(current->comm, sizeof(current->comm) - 1, "%s", "quotacheck");
+        unlock_kernel();
+
+        complete(&qchki->qi_starting);
+
+        exp = qchki->qi_exp;
+        filter = &exp->exp_obd->u.filter;
+        oqctl = &qchki->qi_oqctl;
+
+        obd_quotacheck(exp, oqctl);
+        rc = ost_quotacheck_callback(exp, oqctl);
+
+        atomic_inc(&filter->fo_quotachecking);
+
+        return rc;
+}
+
+static int ost_quotacheck(struct ptlrpc_request *req)
+{
+        struct obd_device *obd = req->rq_export->exp_obd;
+        struct filter_obd *filter = &obd->u.filter;
+        struct obd_quotactl *oqctl;
+        int rc;
+        ENTRY;
+
+        oqctl = lustre_swab_reqbuf(req, 0, sizeof(*oqctl),
+                                   lustre_swab_obd_quotactl);
+        if (oqctl == NULL)
+                GOTO(out, rc = -EPROTO);
+
+        rc = lustre_pack_reply(req, 0, NULL, NULL);
+        if (rc) {
+                CERROR("ost: out of memory while packing quotacheck reply\n");
+                GOTO(out, rc = -ENOMEM);
+        }
+
+        if (!atomic_dec_and_test(&filter->fo_quotachecking)) {
+                atomic_inc(&filter->fo_quotachecking);
+                GOTO(out, rc = -EBUSY);
+        }
+        init_completion(&qchkinfo.qi_starting);
+        qchkinfo.qi_exp = req->rq_export;
+        memcpy(&qchkinfo.qi_oqctl, oqctl, sizeof(*oqctl));
+
+        rc = kernel_thread(ost_quotacheck_thread, &qchkinfo, CLONE_VM|CLONE_FILES);
+        if (rc < 0) {
+                CERROR("%s: error starting ost_quotacheck_thread: %d\n",
+                       obd->obd_name, rc);
+                atomic_inc(&filter->fo_quotachecking);
+        } else {
+                CDEBUG(D_INFO, "%s: ost_quotacheck_thread: %d\n",
+                       obd->obd_name, rc);
+                wait_for_completion(&qchkinfo.qi_starting);
+                rc = 0;
+        }
+
+        EXIT;
+out:
+        return rc;
+}
+
+static int ost_quotactl(struct ptlrpc_request *req)
+{
+        struct obd_quotactl *oqctl, *repoqc;
+        int rc, size = sizeof(*repoqc);
+        ENTRY;
+
+        oqctl = lustre_swab_reqbuf(req, 0, sizeof(*oqctl),
+                                   lustre_swab_obd_quotactl);
+        if (oqctl == NULL)
+                GOTO(out, rc = -EPROTO);
+
+        rc = lustre_pack_reply(req, 1, &size, NULL);
+        if (rc)
+                GOTO(out, rc);
+
+        repoqc = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repoqc));
+        memcpy(repoqc, oqctl, sizeof(*repoqc));
+
+        req->rq_status = obd_quotactl(req->rq_export, repoqc);
+out:
+        RETURN(rc);
+}
+
 static int ost_handle(struct ptlrpc_request *req)
 {
         struct obd_trans_info trans_info = { 0, };
@@ -1047,6 +1175,16 @@ static int ost_handle(struct ptlrpc_request *req)
                 DEBUG_REQ(D_INODE, req, "get_info");
                 rc = ost_get_info(req->rq_export, req);
                 break;
+        case OST_QUOTACHECK:
+                CDEBUG(D_INODE, "quotacheck\n");
+                OBD_FAIL_RETURN(OBD_FAIL_OST_QUOTACHECK_NET, 0);
+                rc = ost_quotacheck(req);
+                break;
+        case OST_QUOTACTL:
+                CDEBUG(D_INODE, "quotactl\n");
+                OBD_FAIL_RETURN(OBD_FAIL_OST_QUOTACTL_NET, 0);
+                rc = ost_quotactl(req);
+                break;
         case OBD_PING:
                 DEBUG_REQ(D_INODE, req, "ping");
                 rc = target_handle_ping(req);
index fa2fc8e..51399a9 100644 (file)
@@ -71,7 +71,7 @@ int llog_origin_connect(struct llog_ctxt *ctxt, int count,
         lgr->lgr_hdr.lrh_len = lgr->lgr_tail.lrt_len = sizeof(*lgr);
         lgr->lgr_hdr.lrh_type = LLOG_GEN_REC;
         lgr->lgr_gen = ctxt->loc_gen;
-        rc = llog_add(ctxt, &lgr->lgr_hdr, NULL, NULL, 1);
+        rc = llog_add(ctxt, &lgr->lgr_hdr, NULL, NULL, 1, NULL);
         OBD_FREE(lgr, sizeof(*lgr));
         if (rc != 1)
                 RETURN(rc);
index 5437cb4..910613f 100644 (file)
@@ -53,6 +53,8 @@ struct ll_rpc_opcode {
         { OST_SAN_WRITE,    "ost_san_write" },
         { OST_SYNC,         "ost_sync" },
         { OST_SET_INFO,     "ost_set_info" },
+        { OST_QUOTACHECK,   "ost_quotacheck" },
+        { OST_QUOTACTL,     "ost_quotactl" },
         { MDS_GETATTR,      "mds_getattr" },
         { MDS_GETATTR_NAME, "mds_getattr_name" },
         { MDS_CLOSE,        "mds_close" },
@@ -67,6 +69,8 @@ struct ll_rpc_opcode {
         { MDS_SYNC,         "mds_sync" },
         { MDS_DONE_WRITING, "mds_done_writing" },
         { MDS_SET_INFO,     "mds_set_info" },
+        { MDS_QUOTACHECK,   "mds_quotacheck" },
+        { MDS_QUOTACTL,     "mds_quotactl" },
         { LDLM_ENQUEUE,     "ldlm_enqueue" },
         { LDLM_CONVERT,     "ldlm_convert" },
         { LDLM_CANCEL,      "ldlm_cancel" },
index 955fe2f..26c354e 100644 (file)
@@ -462,6 +462,38 @@ void lustre_swab_mds_body (struct mds_body *b)
         __swab32s (&b->eadatasize);
 }
 
+static void lustre_swab_obd_dqinfo (struct obd_dqinfo *i)
+{
+        __swab64s (&i->dqi_bgrace);
+        __swab64s (&i->dqi_igrace);
+        __swab32s (&i->dqi_flags);
+        __swab32s (&i->dqi_valid);
+}
+
+static void lustre_swab_obd_dqblk (struct obd_dqblk *b)
+{
+        __swab64s (&b->dqb_ihardlimit);
+        __swab64s (&b->dqb_isoftlimit);
+        __swab64s (&b->dqb_curinodes);
+        __swab64s (&b->dqb_bhardlimit);
+        __swab64s (&b->dqb_bsoftlimit);
+        __swab64s (&b->dqb_curspace);
+        __swab64s (&b->dqb_btime);
+        __swab64s (&b->dqb_itime);
+        __swab32s (&b->dqb_valid);
+        __swab32s (&b->padding);
+}
+
+void lustre_swab_obd_quotactl (struct obd_quotactl *q)
+{
+        __swab32s (&q->qc_cmd);
+        __swab32s (&q->qc_type);
+        __swab32s (&q->qc_id);
+        __swab32s (&q->qc_stat);
+        lustre_swab_obd_dqinfo (&q->qc_dqinfo);
+        lustre_swab_obd_dqblk (&q->qc_dqblk);
+}
+
 void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa)
 {
         __swab32s (&sa->sa_opcode);
@@ -681,6 +713,60 @@ void lustre_swab_ptlbd_rsp (struct ptlbd_rsp *r)
         __swab16s (&r->r_error_cnt);
 }
 
+/* no one calls this */
+int llog_log_swabbed(struct llog_log_hdr *hdr)
+{
+        if (hdr->llh_hdr.lrh_type == __swab32(LLOG_HDR_MAGIC))
+                return 1;
+        if (hdr->llh_hdr.lrh_type == LLOG_HDR_MAGIC)
+                return 0;
+        return -1;
+}
+
+void lustre_swab_llogd_body (struct llogd_body *d)
+{
+        __swab64s (&d->lgd_logid.lgl_oid);
+        __swab64s (&d->lgd_logid.lgl_ogr);
+        __swab32s (&d->lgd_logid.lgl_ogen);
+        __swab32s (&d->lgd_ctxt_idx);
+        __swab32s (&d->lgd_llh_flags);
+        __swab32s (&d->lgd_index);
+        __swab32s (&d->lgd_saved_index);
+        __swab32s (&d->lgd_len);
+        __swab64s (&d->lgd_cur_offset);
+}
+
+void lustre_swab_llog_hdr (struct llog_log_hdr *h)
+{
+        __swab32s (&h->llh_hdr.lrh_index);
+        __swab32s (&h->llh_hdr.lrh_len);
+        __swab32s (&h->llh_hdr.lrh_type);
+        __swab64s (&h->llh_timestamp);
+        __swab32s (&h->llh_count);
+        __swab32s (&h->llh_bitmap_offset);
+        __swab32s (&h->llh_flags);
+        __swab32s (&h->llh_tail.lrt_index);
+        __swab32s (&h->llh_tail.lrt_len);
+}
+
+void lustre_swab_llogd_conn_body (struct llogd_conn_body *d)
+{
+        __swab64s (&d->lgdc_gen.mnt_cnt);
+        __swab64s (&d->lgdc_gen.conn_cnt);
+        __swab64s (&d->lgdc_logid.lgl_oid);
+        __swab64s (&d->lgdc_logid.lgl_ogr);
+        __swab32s (&d->lgdc_logid.lgl_ogen);
+        __swab32s (&d->lgdc_ctxt_idx);
+}
+
+void lustre_swab_qdata(struct qunit_data *d)
+{
+        __swab32s (&d->qd_id);
+        __swab32s (&d->qd_type);
+        __swab32s (&d->qd_count);
+        __swab32s (&d->qd_isblk);
+}
+
 void lustre_assert_wire_constants(void)
 {
         /* Wire protocol assertions generated by 'wirecheck'
@@ -745,7 +831,11 @@ void lustre_assert_wire_constants(void)
                  (long long)OST_SAN_WRITE);
         LASSERTF(OST_SYNC == 16, " found %lld\n",
                  (long long)OST_SYNC);
-        LASSERTF(OST_LAST_OPC == 18, " found %lld\n",
+        LASSERTF(OST_QUOTACHECK == 18, " found %lld\n",
+                 (long long)OST_QUOTACHECK);
+        LASSERTF(OST_QUOTACTL == 19, " found %lld\n",
+                 (long long)OST_QUOTACTL);
+        LASSERTF(OST_LAST_OPC == 20, " found %lld\n",
                  (long long)OST_LAST_OPC);
         LASSERTF(OBD_OBJECT_EOF == 0xffffffffffffffffULL," found %lld\n",
                  (long long)OBD_OBJECT_EOF);
@@ -779,7 +869,11 @@ void lustre_assert_wire_constants(void)
                  (long long)MDS_DONE_WRITING);
         LASSERTF(MDS_SET_INFO == 46, " found %lld\n",
                  (long long)MDS_SET_INFO);
-        LASSERTF(MDS_LAST_OPC == 47, " found %lld\n",
+        LASSERTF(MDS_QUOTACHECK == 47, " found %lld\n",
+                 (long long)MDS_QUOTACHECK);
+        LASSERTF(MDS_QUOTACTL == 48, " found %lld\n",
+                 (long long)MDS_QUOTACTL);
+        LASSERTF(MDS_LAST_OPC == 49, " found %lld\n",
                  (long long)MDS_LAST_OPC);
         LASSERTF(REINT_SETATTR == 1, " found %lld\n",
                  (long long)REINT_SETATTR);
@@ -861,7 +955,9 @@ void lustre_assert_wire_constants(void)
                  (long long)OBD_PING);
         LASSERTF(OBD_LOG_CANCEL == 401, " found %lld\n",
                  (long long)OBD_LOG_CANCEL);
-        LASSERTF(OBD_LAST_OPC == 402, " found %lld\n",
+        LASSERTF(OBD_QC_CALLBACK == 402, " found %lld\n",
+                 (long long)OBD_QC_CALLBACK);
+        LASSERTF(OBD_LAST_OPC == 403, " found %lld\n",
                  (long long)OBD_LAST_OPC);
         /* Sizes and Offsets */
 
@@ -1847,6 +1943,8 @@ void lustre_assert_wire_constants(void)
                  (long long)OST_RAID1_REC);
         LASSERTF(MDS_UNLINK_REC == 274801668, " found %lld\n",
                  (long long)MDS_UNLINK_REC);
+        LASSERTF(MDS_SETATTR_REC == 274801665, " found %lld\n",
+                 (long long)MDS_SETATTR_REC);
         LASSERTF(OBD_CFG_REC == 274857984, " found %lld\n",
                  (long long)OBD_CFG_REC);
         LASSERTF(PTL_CFG_REC == 274923520, " found %lld\n",
index b4fd554..4629b71 100644 (file)
@@ -157,6 +157,7 @@ EXPORT_SYMBOL(lustre_swab_ost_last_id);
 EXPORT_SYMBOL(lustre_swab_ost_lvb);
 EXPORT_SYMBOL(lustre_swab_mds_status_req);
 EXPORT_SYMBOL(lustre_swab_mds_body);
+EXPORT_SYMBOL(lustre_swab_obd_quotactl);
 EXPORT_SYMBOL(lustre_swab_mds_rec_setattr);
 EXPORT_SYMBOL(lustre_swab_mds_rec_create);
 EXPORT_SYMBOL(lustre_swab_mds_rec_link);
@@ -175,6 +176,7 @@ EXPORT_SYMBOL(lustre_swab_ldlm_reply);
 EXPORT_SYMBOL(lustre_swab_ptlbd_op);
 EXPORT_SYMBOL(lustre_swab_ptlbd_niob);
 EXPORT_SYMBOL(lustre_swab_ptlbd_rsp);
+EXPORT_SYMBOL(lustre_swab_qdata);
 
 /* recover.c */
 EXPORT_SYMBOL(ptlrpc_run_recovery_over_upcall);
index e9ddfa7..ca6683b 100755 (executable)
@@ -16,7 +16,7 @@ MOUNT=${MOUNT:-/mnt/lustre}
 MOUNT2=${MOUNT2:-${MOUNT}2}
 NETTYPE=${NETTYPE:-tcp}
 
-OSTCOUNT=${OSTCOUNT:-5}
+OSTCOUNT=${OSTCOUNT:-2}
 # OSTDEVN will still override the device for OST N
 
 OSTSIZE=${OSTSIZE:-150000}
diff --git a/lustre/tests/quota_sanity.sh b/lustre/tests/quota_sanity.sh
new file mode 100644 (file)
index 0000000..9261668
--- /dev/null
@@ -0,0 +1,310 @@
+#!/bin/bash
+
+set -e
+#set -vx
+
+SRCDIR=`dirname $0`
+export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH:/sbin
+. $SRCDIR/test-framework.sh
+
+LFS=${LFS:-lfs}
+LCTL=${LCTL:-lctl}
+USER="quota_usr"
+TSTID=${TSTID:-60000}
+RUNAS=${RUNAS:-"runas -u $TSTID"}
+BLK_SZ=1024
+BUNIT_SZ=10 # 10 quota blocks
+BTUNE_SZ=5  # 5 quota blocks
+IUNIT_SZ=10 # 10 files
+ITUNE_SZ=5  # 5 files
+
+MOUNT="`cat /proc/mounts | grep "lustre" | awk '{print $2}'`"
+if [ -z "$MOUNT" ]; then
+       echo "ERROR: lustre not mounted, quit test!"
+       exit 1;
+fi
+OSTCOUNT=`cat /proc/fs/lustre/lov/*/activeobd | head -n 1`
+TSTDIR="$MOUNT/quota_dir"
+
+# set_blk_tunables(bunit_sz, btune_sz)
+set_blk_tunables() {
+       # set bunit and btune size on all obdfilters
+       for i in `ls /proc/fs/lustre/obdfilter/*/quota_btune_sz`; do
+               echo $(($2 * $BLK_SZ)) > $i
+       done
+       for i in `ls /proc/fs/lustre/obdfilter/*/quota_bunit_sz`; do
+               echo $(($1 * $BLK_SZ)) > $i
+       done;
+       # set bunit and btune size on mds
+       for i in `ls /proc/fs/lustre/mds/mds*/quota_btune_sz`; do
+               echo $(($2 * $BLK_SZ)) > $i
+       done
+       for i in `ls /proc/fs/lustre/mds/mds*/quota_bunit_sz`; do
+               echo $(($1 * $BLK_SZ)) > $i
+       done
+}
+
+# set_file_tunables(iunit_sz, itune_sz)
+set_file_tunables() {
+       # set iunit and itune size on all obdfilters
+       for i in `ls /proc/fs/lustre/obdfilter/*/quota_itune_sz`; do
+               echo $2 > $i
+       done
+       for i in `ls /proc/fs/lustre/obdfilter/*/quota_iunit_sz`; do
+               echo $1 > $i
+       done;
+       # set iunit and itune size on mds
+       for i in `ls /proc/fs/lustre/mds/mds*/quota_itune_sz`; do
+               echo $2 > $i
+       done
+       for i in `ls /proc/fs/lustre/mds/mds*/quota_iunit_sz`; do
+               echo $1 > $i
+       done
+}
+
+prepare_test() {
+       # create test group
+       GRP="`cat /etc/group | grep "$USER" | awk -F: '{print $1}'`"
+       if [ -z "$GRP" ]; then
+               groupadd -g $TSTID "$USER"
+       fi
+       TSTID="`cat /etc/group | grep "$USER" | awk -F: '{print $3}'`"
+
+       # create test user
+       USR="`cat /etc/passwd | grep "$USER" | awk -F: '{print $1}'`"
+       if [ -z "$USR" ]; then
+               useradd -u $TSTID -g $TSTID -d /tmp "$USER"
+       fi
+       
+       RUNAS="runas -u $TSTID"
+       # set block tunables
+
+       set_blk_tunables $BUNIT_SZ $BTUNE_SZ
+       # set file tunaables
+       set_file_tunables $IUNIT_SZ $ITUNE_SZ
+
+       [ -d $TSTDIR ] || mkdir $TSTDIR 
+       chmod 777 $TSTDIR
+}
+
+cleanup_test() {
+       # delete test user and group
+       userdel "$USER"
+       groupdel "$USER"
+       
+       # restore block tunables to default size
+       set_blk_tunables $((1024 * 100)) $((1024 * 50))
+       # restore file tunables to default size
+       set_file_tunables 5000 2500 
+
+       rm -fr $TSTDIR
+}
+
+# set quota
+test_1() {
+       echo "== Enable quota"
+       $LFS quotaoff -ug $MOUNT
+       $LFS quotacheck -ug $MOUNT
+       return 0
+}
+
+# block hard limit (normal use and out of quota)
+test_2() {
+       echo "== Block hard limit"
+       LIMIT=$(( $BUNIT_SZ * $(($OSTCOUNT + 1)) * 10)) # 10 bunits each sever
+       TESTFILE="$TSTDIR/quota_tst20"
+       
+       echo "  User quota (limit: $LIMIT bytes)"
+       $LFS setquota -u $USER 0 $LIMIT 0 0 $MOUNT
+
+       $RUNAS touch $TESTFILE >/dev/null 2>&1
+       
+       echo "    Write ..."
+       $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) > /dev/null 2>&1 || error "(usr) write failure, but expect success"
+       echo "    Done"
+       echo "    Write out of block quota ..."
+       $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) seek=$(($LIMIT/2)) > /dev/null 2>&1
+       # flush cache, ensure noquota flag is setted on client
+       sync; sleep 1; sync;
+       $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$LIMIT > /dev/null 2>&1 && error "(usr) write success, but expect EDQUOT"
+       echo "    EDQUOT"
+
+       rm -f $TESTFILE
+       
+       echo "  Group quota (limit: $LIMIT bytes)"
+       $LFS setquota -u $USER 0 0 0 0 $MOUNT           # clear user limit
+       $LFS setquota -g $USER 0 $LIMIT 0 0 $MOUNT
+       TESTFILE="$TSTDIR/quota_tst21"
+
+       $RUNAS touch $TESTFILE >/dev/null 2>&1
+
+       echo "    Write ..."
+       $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) > /dev/null 2>&1 || error "(grp) write failure, but expect success"
+       echo "    Done"
+       echo "    Write out of block quota ..."
+       $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) seek=$(($LIMIT/2)) > /dev/null 2>&1
+       sync; sleep 1; sync;
+       $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$LIMIT > /dev/null 2>&1 && error "(grp) write success, but expect EDQUOT"
+       echo "    EDQUOT"
+
+       # cleanup
+       rm -f $TESTFILE
+       $LFS setquota -g $USER 0 0 0 0 $MOUNT
+       return 0
+}
+
+# file hard limit (normal use and out of quota)
+test_3() {
+       echo "== File hard limit"
+       LIMIT=$(($IUNIT_SZ * 10)) # 10 iunits on mds
+       TESTFILE="$TSTDIR/quota_tst30"
+
+       echo "  User quota (limit: $LIMIT files)"
+       $LFS setquota -u $USER 0 0 0 $LIMIT $MOUNT
+
+       echo "    Create $LIMIT files ..."
+       for i in `seq ${LIMIT}`; do
+               $RUNAS touch ${TESTFILE}_$i > /dev/null 2>&1 || error "(usr) touch failure, but except success"
+       done
+       echo "    Done"
+       echo "    Create out of file quota ..."
+       $RUNAS touch ${TESTFILE}_xxx > /dev/null 2>&1 && error "(usr) touch success, but expect EDQUOT"
+       echo "    EDQUOT"
+
+       for i in `seq ${LIMIT}`; do
+               rm -f ${TESTFILE}_$i
+       done
+
+       echo "  Group quota (limit: $LIMIT files)"
+       $LFS setquota -u $USER 0 0 0 0 $MOUNT           # clear user limit
+       $LFS setquota -g $USER 0 0 0 $LIMIT $MOUNT
+       TESTFILE="$TSTDIR/quota_tst31"
+
+       echo "    Create $LIMIT files ..."
+       for i in `seq ${LIMIT}`; do
+               $RUNAS touch ${TESTFILE}_$i > /dev/null 2>&1 || error "(grp) touch failure, but expect success"
+       done
+       echo "    Done"
+       echo "    Create out of file quota ..."
+       $RUNAS touch ${TESTFILE}_xxx > /dev/null 2>&1 && error "(grp) touch success, but expect EDQUOT"
+       echo "    EDQUOT"
+
+       # cleanup
+       for i in `seq ${LIMIT}`; do
+               rm -f ${TESTFILE}_$i
+       done
+       $LFS setquota -g $USER 0 0 0 0 $MOUNT
+       return 0
+}
+
+# block soft limit (start timer, timer goes off, stop timer)
+test_4() {
+       echo "== Block soft limit"
+       echo "  ** skipped"
+       return 0
+}
+
+# file soft limit (start timer, timer goes off, stop timer)
+test_5() {
+       echo "== File soft limit"
+       echo "  ** skipped"
+       return 0
+}
+
+# chown & chgrp (chown & chgrp successfully even out of block/file quota)
+test_6() {
+       echo "== Chown/Chgrp ignore quota"
+       BLIMIT=$(( $BUNIT_SZ * $((OSTCOUNT + 1)) * 10)) # 10 bunits on each server
+       ILIMIT=$(( $IUNIT_SZ * 10 )) # 10 iunits on mds
+       
+       echo "  Set quota limit (0 $BLIMIT 0 $ILIMIT) for $USER.$USER"
+       $LFS setquota -u $USER 0 $BLIMIT 0 $ILIMIT $MOUNT
+       $LFS setquota -g $USER 0 $BLIMIT 0 $ILIMIT $MOUNT
+       
+       echo "  Create more than $ILIMIT files and alloc more than $BLIMIT blocks ..."
+       for i in `seq $(($ILIMIT + 1))`; do
+               touch $TSTDIR/quota_tst60_$i > /dev/null 2>&1 || error "touch failure, expect success"
+       done
+       dd if=/dev/zero of=$TSTDIR/quota_tst60_1 bs=$BLK_SZ count=$(($BLIMIT+1)) > /dev/null 2>&1 || error "write failure, expect success"
+
+       echo "  Chown files to $USER.$USER ..."
+       for i in `seq $(($ILIMIT + 1))`; do
+               chown $USER.$USER $TSTDIR/quota_tst60_$i > /dev/null 2>&1 || error "chown failure, but expect success"
+       done
+
+       # cleanup
+       for i in `seq $(($ILIMIT + 1))`; do
+               rm -f $TSTDIR/quota_tst60_$i
+       done
+       $LFS setquota -u $USER 0 0 0 0 $MOUNT
+       $LFS setquota -g $USER 0 0 0 0 $MOUNT
+       return 0
+}
+
+# block quota acquire & release
+test_7() {
+       echo "== Block quota acqurie / release"
+
+       if [ $OSTCOUNT -lt 2 ]; then
+               echo "WARN: too few osts, skip this test."
+               return 0;
+       fi
+
+       LIMIT=$(($BUNIT_SZ * $(($OSTCOUNT + 1)) * 10)) # 10 bunits per server
+       FILEA="$TSTDIR/quota_tst70_a"
+       FILEB="$TSTDIR/quota_tst70_b"
+       
+       echo "  Set block limit $LIMIT bytes to $USER.$USER"
+       $LFS setquota -u $USER 0 $LIMIT 0 0 $MOUNT
+       $LFS setquota -g $USER 0 $LIMIT 0 0 $MOUNT
+
+       echo "  Create filea on OST0 and fileb on OST1"
+       $LFS setstripe $FILEA 65536 0 1
+       $LFS setstripe $FILEB 65536 1 1
+       chown $USER.$USER $FILEA
+       chown $USER.$USER $FILEB
+
+       echo "  Exceed quota limit ..."
+       $RUNAS dd if=/dev/zero of=$FILEB bs=$BLK_SZ count=$(($LIMIT - $BUNIT_SZ * $OSTCOUNT)) >/dev/null 2>&1 || error "write fileb failure, but expect success"
+       sync; sleep 1; sync;
+       $RUNAS dd if=/dev/zero of=$FILEB bs=$BLK_SZ seek=$LIMIT count=$BUNIT_SZ >/dev/null 2>&1 && error "write fileb success, but expect EDQUOT"
+       sync; sleep 1; sync;
+       echo "  Write to OST0 return EDQUOT"
+       # this write of OST0 is cache write, will success
+       $RUNAS dd if=/dev/zero of=$FILEA bs=$BLK_SZ count=$(($BUNIT_SZ * 2)) >/dev/null 2>&1 || error "write filea failure, but expect success"
+       sync; sleep 1; sync;
+       $RUNAS dd if=/dev/zero of=$FILEA bs=$BLK_SZ count=$(($BUNIT_SZ * 2)) seek=$(($BUNIT_SZ *2)) >/dev/null 2>&1 && error "write filea success, but expect EDQUOT"
+       echo "  EDQUOT"
+
+       echo "  Remove fileb to let OST1 release quota"
+       rm -f $FILEB
+
+       echo "  Write to OST0"
+       $RUNAS dd if=/dev/zero of=$FILEA bs=$BLK_SZ count=$(($LIMIT - $BUNIT_SZ * $OSTCOUNT)) >/dev/null 2>&1 || error "write filea failure, expect success"
+       echo "  Done"
+
+       # cleanup
+       rm -f $FILEA
+       $LFS setquota -u $USER 0 0 0 0 $MOUNT
+       $LFS setquota -g $USER 0 0 0 0 $MOUNT
+       return 0
+}
+
+# turn off quota
+test_8()
+{
+       echo "=== Turn off quota"
+       $LFS quotaoff $MOUNT
+       return 0
+}
+       
+prepare_test
+
+# run all tests
+for j in `seq 8`; do
+       test_$j
+       echo "== Done"
+       echo " "
+done
+
+cleanup_test
index ef76583..98a7c03 100755 (executable)
@@ -1002,5 +1002,33 @@ test_56() {
 }
 run_test 56 "don't replay a symlink open request (3440)"
 
+#recovery one mds-ost setattr from llog
+test_57() {
+#define OBD_FAIL_MDS_OST_SETATTR       0x12c
+    do_facet mds "sysctl -w lustre.fail_loc=0x8000012c"
+    touch $DIR/$tfile
+    replay_barrier mds
+    fail mds
+    sleep 1
+    $CHECKSTAT -t file $DIR/$tfile || return 1
+    do_facet mds "sysctl -w lustre.fail_loc=0x0"
+    rm $DIR/$tfile
+}
+run_test 57 "test recovery from llog for setattr op"
+
+#recovery many mds-ost setattr from llog
+test_58() {
+#define OBD_FAIL_MDS_OST_SETATTR       0x12c
+    do_facet mds "sysctl -w lustre.fail_loc=0x8000012c"
+    createmany -o $DIR/$tfile-%d 30000
+    replay_barrier mds
+    fail mds
+    sleep 2
+    $CHECKSTAT -t file $DIR/$tfile-* || return 1
+    do_facet mds "sysctl -w lustre.fail_loc=0x0"
+    rm -f $DIR/$tfile-*
+}
+run_test 58 "test recovery from llog for setattr op (test llog_gen_rec)"
+
 equals_msg test complete, cleaning up
 $CLEANUP
diff --git a/lustre/tests/run-quotacheck.sh b/lustre/tests/run-quotacheck.sh
new file mode 100644 (file)
index 0000000..85c5b79
--- /dev/null
@@ -0,0 +1,30 @@
+#!/bin/bash
+PATH=`dirname $0`:`dirname $0`/../utils:$PATH
+TMP=${TMP:-/tmp}
+
+OBD=${1:-obdfilter}
+TARGET=`ls /proc/fs/lustre/$OBD | grep -v num_refs | head -n 1`
+[ -z "$TARGET" ] && echo "no TARGET available, skipping quotacheck test" && exit 0
+
+insmod ../lvfs/quotacheck_test.ko || exit 1
+lctl modules > $TMP/ogdb-`hostname`
+echo "NOW reload debugging syms.."
+
+RC=0
+lctl <<EOT || RC=2
+newdev
+attach quotacheck_test qchk_name qchk_uuid
+setup $TARGET
+EOT
+
+# Using ignore_errors will allow lctl to cleanup even if the test fails.
+lctl <<EOC
+cfg_device qchk_name
+ignore_errors
+cleanup
+detach
+EOC
+rmmod quotacheck_test || RC2=3
+[ $RC -eq 0 -a "$RC2" ] && RC=$RC2
+
+exit $RC
diff --git a/lustre/tests/run-quotactl.sh b/lustre/tests/run-quotactl.sh
new file mode 100644 (file)
index 0000000..74d6bce
--- /dev/null
@@ -0,0 +1,30 @@
+#!/bin/bash
+PATH=`dirname $0`:`dirname $0`/../utils:$PATH
+TMP=${TMP:-/tmp}
+OBD=${1:-obdfilter}
+
+TARGET=`ls /proc/fs/lustre/$OBD | grep -v num_refs | head -n 1`
+[ -z "$TARGET" ] && echo "no TARGET available, skipping quotactl test" && exit 0
+
+insmod ../lvfs/quotactl_test.ko || exit 1
+lctl modules > $TMP/ogdb-`hostname`
+echo "NOW reload debugging syms.."
+
+RC=0
+lctl <<EOT || RC=2
+newdev
+attach quotactl_test qctl_name qctl_uuid
+setup $TARGET
+EOT
+
+# Using ignore_errors will allow lctl to cleanup even if the test fails.
+lctl <<EOC
+cfg_device qctl_name
+ignore_errors
+cleanup
+detach
+EOC
+rmmod quotactl_test || RC2=3
+[ $RC -eq 0 -a "$RC2" ] && RC=$RC2
+
+exit $RC
diff --git a/lustre/tests/run-quotafmt.sh b/lustre/tests/run-quotafmt.sh
new file mode 100644 (file)
index 0000000..989cfa5
--- /dev/null
@@ -0,0 +1,29 @@
+#!/bin/bash
+PATH=`dirname $0`:`dirname $0`/../utils:$PATH
+TMP=${TMP:-/tmp}
+
+MDS=`ls /proc/fs/lustre/mds | grep -v num_refs | head -n 1`
+[ -z "$MDS" ] && echo "no MDS available, skipping quotafile test" && exit 0
+
+insmod ../ldiskfs/quotafmt_test.ko || exit 1
+lctl modules > $TMP/ogdb-`hostname`
+echo "NOW reload debugging syms.."
+
+RC=0
+lctl <<EOT || RC=2
+newdev
+attach quotfmt_test quotfmt_name quotfmt_uuid
+setup $MDS
+EOT
+
+# Using ignore_errors will allow lctl to cleanup even if the test fails.
+lctl <<EOC
+cfg_device quotfmt_name
+ignore_errors
+cleanup
+detach
+EOC
+rmmod quotafmt_test || RC2=3
+[ $RC -eq 0 -a "$RC2" ] && RC=$RC2
+
+exit $RC
index cccb397..35b123f 100644 (file)
 #include <getopt.h>
 #include <string.h>
 #include <mntent.h>
+#include <errno.h>
+#include <pwd.h>
+#include <grp.h>
+
 #include <portals/api-support.h>
 #include <portals/ptlctl.h>
 
 #include "parser.h"
 #include "obdctl.h"
 
+/* FIXME: Q_SYNC ... commands defined in linux/quota.h seems broken,
+ *        so define new commands with the value in kernel */
+#define LUSTRE_Q_QUOTAON  0x800002     /* turn quotas on */
+#define LUSTRE_Q_QUOTAOFF 0x800003     /* turn quotas off */
+#define LUSTRE_Q_GETINFO  0x800005     /* get information about quota files */
+#define LUSTRE_Q_SETINFO  0x800006     /* set information about quota files */
+#define LUSTRE_Q_GETQUOTA 0x800007     /* get user quota structure */
+#define LUSTRE_Q_SETQUOTA 0x800008     /* set user quota structure */
+
 unsigned int portal_subsystem_debug = 0;
 
 /* all functions */
@@ -49,6 +62,12 @@ static int lfs_getstripe(int argc, char **argv);
 static int lfs_osts(int argc, char **argv);
 static int lfs_check(int argc, char **argv);
 static int lfs_catinfo(int argc, char **argv);
+static int lfs_quotachog(int argc, char **argv);
+static int lfs_quotacheck(int argc, char **argv);
+static int lfs_quotaon(int argc, char **argv);
+static int lfs_quotaoff(int argc, char **argv);
+static int lfs_setquota(int argc, char **argv);
+static int lfs_quota(int argc, char **argv);
 
 /* all avaialable commands */
 command_t cmdlist[] = {
@@ -79,6 +98,24 @@ command_t cmdlist[] = {
          "\tkeywords are one of followings: config, deletions.\n"
          "\tnode name must be provided when use keyword config."},
         {"osts", lfs_osts, 0, "osts"},
+        {"quotachog",lfs_quotachog, 0,
+         "Change all files owner or group in specified filesystem.\n"
+         "usage: quotachog [-i] <filesystem>\n"
+         "\t-i: ignore error if file is not exist\n"},
+        {"quotacheck", lfs_quotacheck, 0,
+         "Scan the specified filesystem for disk usage, and create,\n"
+         "or update quota files.\n"
+         "usage: quotacheck [ -ug ] <filesystem>"},
+        {"quotaon", lfs_quotaon, 0, "Turn filesystem quotas on.\n"
+         "usage: quotaon [ -ugf ] <filesystem>"},
+        {"quotaoff", lfs_quotaoff, 0, "Turn filesystem quotas off.\n"
+         "usage: quotaoff [ -ug ] <filesystem>"},
+        {"setquota", lfs_setquota, 0, "Set filesystem quotas.\n"
+         "usage: setquota [ -u | -g ] <name> <block-softlimit> <block-hardlimit> <inode-softlimit> <inode-hardlimit> <filesystem>\n"
+         "       setquota -t [ -u | -g ] <block-grace> <inode-grace> <filesystem>"},
+        {"quota", lfs_quota, 0, "Display disk usage and limits.\n"
+         "usage: quota -t [ -u |-g ] <filesystem>\n"
+         "       quota [ -o obd_uuid ] [ -u | -g ] [name] <filesystem>"},
         {"help", Parser_help, 0, "help"},
         {"exit", Parser_quit, 0, "quit"},
         {"quit", Parser_quit, 0, "quit"},
@@ -386,6 +423,531 @@ static int lfs_catinfo(int argc, char **argv)
         return rc;
 }
 
+
+static int lfs_quotachog(int argc, char **argv)
+{
+
+        int c,rc;
+        int flag = 0;
+
+        while ((c = getopt(argc, argv, "i")) != -1) {
+                switch (c) {
+                case 'i':
+                        flag++;
+                        break;
+                default:
+                        fprintf(stderr, "error: %s: option '-%c' unrecognized\n", argv[0], c);
+                        return CMD_HELP;
+                }
+        }
+        if (optind == argc)
+                return CMD_HELP;
+        rc = llapi_quotachog(argv[optind], flag);
+        if(rc)
+                fprintf(stderr,"error: change file owner/group failed.\n");
+        return rc;
+}
+
+
+static int lfs_quotacheck(int argc, char **argv)
+{
+        int c, check_type = 0;
+        char *mnt;
+        struct if_quotacheck qchk;
+        struct if_quotactl qctl;
+        char *obd_type = qchk.obd_type;
+        char *obd_uuid = qchk.obd_uuid.uuid;
+        int rc;
+
+        memset(&qchk, 0, sizeof(qchk));
+
+        optind = 0;
+        while ((c = getopt(argc, argv, "ug")) != -1) {
+                switch (c) {
+                case 'u':
+                        check_type |= 0x01;
+                        break;
+                case 'g':
+                        check_type |= 0x02;
+                        break;
+                default:
+                        fprintf(stderr, "error: %s: option '-%c' unrecognized\n", argv[0], c);
+                        return CMD_HELP;
+                }
+        }
+
+        if (check_type)
+                check_type--;
+
+        if (argc == optind)
+                return CMD_HELP;
+
+        mnt = argv[optind];
+
+        memset(&qctl, 0, sizeof(qctl));
+        qctl.qc_cmd = LUSTRE_Q_QUOTAOFF;
+        qctl.qc_id = QFMT_LDISKFS;
+        qctl.qc_type = check_type;
+        llapi_quotactl(mnt, &qctl);
+
+        rc = llapi_quotacheck(mnt, check_type);
+        if (rc) {
+                fprintf(stderr, "quotacheck failed: %s\n", strerror(errno));
+                return rc;
+        }
+
+        rc = llapi_poll_quotacheck(mnt, &qchk);
+        if (rc) {
+                if (*obd_type)
+                        fprintf(stderr, "%s %s ", obd_type, obd_uuid);
+                fprintf(stderr, "quota check failed: %s\n", strerror(errno));
+                return rc;
+        }
+
+        memset(&qctl, 0, sizeof(qctl));
+        qctl.qc_cmd = LUSTRE_Q_QUOTAON;
+        qctl.qc_id = QFMT_LDISKFS;
+        qctl.qc_type = check_type;
+        rc = llapi_quotactl(mnt, &qctl);
+        if (rc) {
+                if (*obd_type)
+                        fprintf(stderr, "%s %s ",
+                                qctl.obd_type, qctl.obd_uuid.uuid);
+                fprintf(stderr, "%s turn on quota failed: %s\n",
+                        argv[0], strerror(errno));
+                return rc;
+        }
+
+        return 0;
+}
+
+static int lfs_quotaon(int argc, char **argv)
+{
+        int c;
+        char *mnt;
+        struct if_quotactl qctl;
+        char *obd_type = qctl.obd_type;
+        char *obd_uuid = qctl.obd_uuid.uuid;
+        int rc;
+
+        memset(&qctl, 0, sizeof(qctl));
+        qctl.qc_cmd = LUSTRE_Q_QUOTAON;
+        qctl.qc_id = QFMT_LDISKFS;
+
+        optind = 0;
+        while ((c = getopt(argc, argv, "ugf")) != -1) {
+                switch (c) {
+                case 'u':
+                        qctl.qc_type |= 0x01;
+                        break;
+                case 'g':
+                        qctl.qc_type |= 0x02;
+                        break;
+                case 'f':
+                        qctl.qc_cmd = LUSTRE_Q_QUOTAOFF;
+                        break;
+                default:
+                        fprintf(stderr, "error: %s: option '-%c' unrecognized\n", argv[0], c);
+                        return CMD_HELP;
+                }
+        }
+
+        if (qctl.qc_type)
+                qctl.qc_type--;
+
+        if (argc == optind)
+                return CMD_HELP;
+
+        mnt = argv[optind];
+
+        rc = llapi_quotactl(mnt, &qctl);
+        if (rc) {
+                if (*obd_type)
+                        fprintf(stderr, "%s %s ", obd_type, obd_uuid);
+                fprintf(stderr, "%s failed: %s\n", argv[0], strerror(errno));
+                return rc;
+        }
+
+        return 0;
+}
+
+static int lfs_quotaoff(int argc, char **argv)
+{
+        int c;
+        char *mnt;
+        struct if_quotactl qctl;
+        char *obd_type = qctl.obd_type;
+        char *obd_uuid = qctl.obd_uuid.uuid;
+        int rc;
+
+        memset(&qctl, 0, sizeof(qctl));
+        qctl.qc_cmd = LUSTRE_Q_QUOTAOFF;
+
+        optind = 0;
+        while ((c = getopt(argc, argv, "ug")) != -1) {
+                switch (c) {
+                case 'u':
+                        qctl.qc_type |= 0x01;
+                        break;
+                case 'g':
+                        qctl.qc_type |= 0x02;
+                        break;
+                default:
+                        fprintf(stderr, "error: %s: option '-%c' unrecognized\n", argv[0], c);
+                        return CMD_HELP;
+                }
+        }
+
+        if (qctl.qc_type)
+                qctl.qc_type--;
+
+        if (argc == optind)
+                return CMD_HELP;
+
+        mnt = argv[optind];
+
+        rc = llapi_quotactl(mnt, &qctl);
+        if (rc) {
+                if (*obd_type)
+                        fprintf(stderr, "%s %s ", obd_type, obd_uuid);
+                fprintf(stderr, "quotaoff failed: %s\n", strerror(errno));
+                return rc;
+        }
+
+        return 0;
+}
+
+static int name2id(unsigned int *id, char *name, int type)
+{
+        if (type == USRQUOTA) {
+                struct passwd *entry;
+
+                if (!(entry = getpwnam(name))) {
+                        if (!errno)
+                                errno = ENOENT;
+                        return -1;
+                }
+
+                *id = entry->pw_uid;
+        } else {
+                struct group *entry;
+
+                if (!(entry = getgrnam(name))) {
+                        if (!errno)
+                                errno = ENOENT;
+                        return -1;
+                }
+
+                *id = entry->gr_gid;
+        }
+
+        return 0;
+}
+
+static int id2name(char **name, unsigned int id, int type)
+{
+        if (type == USRQUOTA) {
+                struct passwd *entry;
+
+                if (!(entry = getpwuid(id))) {
+                        if (!errno)
+                                errno = ENOENT;
+                        return -1;
+                }
+
+                *name = entry->pw_name;
+        } else {
+                struct group *entry;
+
+                if (!(entry = getgrgid(id))) {
+                        if (!errno)
+                                errno = ENOENT;
+                        return -1;
+                }
+
+                *name = entry->gr_name;
+        }
+
+        return 0;
+}
+
+#define ARG2INT(nr, str, msg)                                           \
+do {                                                                    \
+        char *endp;                                                     \
+        nr = strtol(str, &endp, 0);                                     \
+        if (*endp) {                                                    \
+                fprintf(stderr, "error: bad %s: %s\n", msg, str);       \
+                return CMD_HELP;                                        \
+        }                                                               \
+} while (0)
+
+int lfs_setquota(int argc, char **argv)
+{
+        int c;
+        char *mnt;
+        struct if_quotactl qctl;
+        char *obd_type = qctl.obd_type;
+        char *obd_uuid = qctl.obd_uuid.uuid;
+        int rc;
+
+        memset(&qctl, 0, sizeof(qctl));
+        qctl.qc_cmd = LUSTRE_Q_SETQUOTA;
+
+        optind = 0;
+        while ((c = getopt(argc, argv, "ugt")) != -1) {
+                switch (c) {
+                case 'u':
+                        qctl.qc_type |= 0x01;
+                        break;
+                case 'g':
+                        qctl.qc_type |= 0x02;
+                        break;
+                case 't':
+                        qctl.qc_cmd = LUSTRE_Q_SETINFO;
+                        break;
+                default:
+                        fprintf(stderr, "error: %s: option '-%c' unrecognized\n", argv[0], c);
+                        return CMD_HELP;
+                }
+        }
+
+        if (qctl.qc_type)
+                qctl.qc_type--;
+
+        if (qctl.qc_type == UGQUOTA) {
+                fprintf(stderr, "error: user and group quotas can't be set together\n");
+                return CMD_HELP;
+        }
+
+        if (qctl.qc_cmd == LUSTRE_Q_SETQUOTA) {
+                struct if_dqblk *dqb = &qctl.qc_dqblk;
+
+                if (optind + 6 != argc)
+                        return CMD_HELP;
+
+                rc = name2id(&qctl.qc_id, argv[optind++], qctl.qc_type);
+                if (rc) {
+                        fprintf(stderr, "error: find id for name %s failed: %s\n",
+                                argv[optind - 1], strerror(errno));
+                        return CMD_HELP;
+                }
+
+                ARG2INT(dqb->dqb_bsoftlimit, argv[optind++], "block-softlimit");
+                ARG2INT(dqb->dqb_bhardlimit, argv[optind++], "block-hardlimit");
+                ARG2INT(dqb->dqb_isoftlimit, argv[optind++], "inode-softlimit");
+                ARG2INT(dqb->dqb_ihardlimit, argv[optind++], "inode-hardlimit");
+        } else {
+                struct if_dqinfo *dqi = &qctl.qc_dqinfo;
+
+                if (optind + 3 != argc)
+                        return CMD_HELP;
+
+                ARG2INT(dqi->dqi_bgrace, argv[optind++], "block-grace");
+                ARG2INT(dqi->dqi_igrace, argv[optind++], "inode-grace");
+        }
+
+        mnt = argv[optind];
+
+        rc = llapi_quotactl(mnt, &qctl);
+        if (rc) {
+                if (*obd_type)
+                        fprintf(stderr, "%s %s ", obd_type, obd_uuid);
+                fprintf(stderr, "setquota failed: %s\n", strerror(errno));
+                return rc;
+        }
+
+        return 0;
+}
+
+static inline char *type2name(int check_type)
+{
+        if (check_type == USRQUOTA)
+                return "user";
+        else if (check_type == GRPQUOTA)
+                return "group";
+        else
+                return "unknown";
+}
+
+
+static void grace2str(time_t seconds,char *buf)
+{
+        uint minutes, hours, days;
+         
+        minutes = (seconds + 30) / 60;
+        hours = minutes / 60;
+        minutes %= 60;
+        days = hours / 24;
+        hours %= 24;
+        if (days >= 2)
+                snprintf(buf, 40, "%ddays", days);
+        else
+                snprintf(buf, 40, "%02d:%02d", hours + days * 24, minutes);
+}
+
+
+static void diff2str(time_t seconds, char *buf, time_t now)
+{
+
+        buf[0] = 0;
+        if (!seconds)
+                return;
+        if (seconds <= now) {
+                strcpy(buf, "none");
+                return;
+        }
+        grace2str(seconds - now, buf);
+}
+
+
+static void print_quota(char *mnt, char *name, struct if_quotactl *qctl)
+{
+        time_t now;
+
+        time(&now);
+
+        if (qctl->qc_cmd == LUSTRE_Q_GETQUOTA || qctl->qc_cmd == Q_GETOQUOTA) {
+                int bover = 0, iover = 0;
+                struct if_dqblk *dqb = &qctl->qc_dqblk;
+
+                if (dqb->dqb_bhardlimit &&
+                    toqb(dqb->dqb_curspace) > dqb->dqb_bhardlimit) {
+                        bover = 1;
+                } else if (dqb->dqb_bsoftlimit &&
+                           toqb(dqb->dqb_curspace) > dqb->dqb_bsoftlimit) {
+                        if (dqb->dqb_btime > now) {
+                                bover = 2;
+                        } else {
+                                bover = 3;
+                        }
+                }
+
+                if (dqb->dqb_ihardlimit &&
+                    dqb->dqb_curinodes > dqb->dqb_ihardlimit) {
+                        iover = 1;
+                } else if (dqb->dqb_isoftlimit &&
+                           dqb->dqb_curinodes > dqb->dqb_isoftlimit) {
+                        if (dqb->dqb_btime > now) {
+                                iover = 2;
+                        } else {
+                                iover = 3;
+                        }
+                }
+
+                printf("Disk quotas for %s %s (%cid %u):\n",
+                        type2name(qctl->qc_type), name,
+                        *type2name(qctl->qc_type), qctl->qc_id);
+                printf("%15s%8s %7s%8s%8s%8s %7s%8s%8s\n",
+                        "Filesystem",
+                        "blocks", "quota", "limit", "grace",
+                        "files", "quota", "limit", "grace");
+
+#if 0           /* XXX: always print quotas even when no usages */
+                if (dqb->dqb_curspace || dqb->dqb_curinodes)
+#endif
+                {
+                        char numbuf[3][32];
+                        char timebuf[40];
+
+                        if (strlen(mnt) > 15)
+                                printf("%s\n%15s", mnt, "");
+                        else
+                                printf("%15s", mnt);
+                        if (bover)
+                                diff2str(dqb->dqb_btime, timebuf, now);
+                        sprintf(numbuf[0], "%llu", toqb(dqb->dqb_curspace));
+                        sprintf(numbuf[1], "%llu", dqb->dqb_bsoftlimit);
+                        sprintf(numbuf[2], "%llu", dqb->dqb_bhardlimit);
+                        printf(" %7s%c %6s %7s %7s", numbuf[0], bover ? '*' : ' ', numbuf[1],
+                               numbuf[2], bover > 1 ? timebuf : "");
+                        if (iover)
+                                diff2str(dqb->dqb_itime, timebuf, now);
+                        sprintf(numbuf[0], "%llu", dqb->dqb_curinodes);
+                        sprintf(numbuf[1], "%llu", dqb->dqb_isoftlimit);
+                        sprintf(numbuf[2], "%llu", dqb->dqb_ihardlimit);
+                        printf(" %7s%c %6s %7s %7s\n", numbuf[0], iover ? '*' : ' ', numbuf[1],
+                               numbuf[2], iover > 1 ? timebuf : "");
+                }
+        } else if (qctl->qc_cmd == LUSTRE_Q_GETINFO || qctl->qc_cmd == Q_GETOINFO) {
+                char bgtimebuf[40];
+                char igtimebuf[40];
+
+                grace2str(qctl->qc_dqinfo.dqi_bgrace, bgtimebuf);
+                grace2str(qctl->qc_dqinfo.dqi_igrace, igtimebuf);
+                printf("Block grace time: %s; Inode grace time: %s\n", bgtimebuf, igtimebuf);
+        }
+}
+
+static int lfs_quota(int argc, char **argv)
+{
+        int c;
+        char *name = NULL, *mnt;
+        struct if_quotactl qctl;
+        char *obd_type = qctl.obd_type;
+        char *obd_uuid = qctl.obd_uuid.uuid;
+        int rc;
+
+        memset(&qctl, 0, sizeof(qctl));
+        qctl.qc_cmd = LUSTRE_Q_GETQUOTA;
+
+        optind = 0;
+        while ((c = getopt(argc, argv, "ugto:")) != -1) {
+                switch (c) {
+                case 'u':
+                        qctl.qc_type |= 0x01;
+                        break;
+                case 'g':
+                        qctl.qc_type |= 0x02;
+                        break;
+                case 't':
+                        qctl.qc_cmd = LUSTRE_Q_GETINFO;
+                        break;
+                case 'o':
+                        strncpy(obd_uuid, optarg, sizeof(qctl.obd_uuid));
+                        break;
+                default:
+                        fprintf(stderr, "error: %s: option '-%c' unrecognized\n", argv[0], c);
+                        return CMD_HELP;
+                }
+        }
+
+        if (qctl.qc_type)
+                qctl.qc_type--;
+
+        if (qctl.qc_type == UGQUOTA) {
+                fprintf(stderr, "error: user or group can't be specified together\n");
+                return CMD_HELP;
+        }
+
+        if (qctl.qc_cmd == LUSTRE_Q_GETQUOTA && optind + 2 == argc) {
+                name = argv[optind++];
+                rc = name2id(&qctl.qc_id, name, qctl.qc_type);
+                if (rc) {
+                        fprintf(stderr, "error: find id for name %s failed: %s\n",
+                                name, strerror(errno));
+                        return CMD_HELP;
+                }
+        } else if (optind + 1 != argc) {
+                return CMD_HELP;
+        }
+
+        mnt = argv[optind];
+
+        rc = llapi_quotactl(mnt, &qctl);
+        if (rc) {
+                if (*obd_type)
+                        fprintf(stderr, "%s %s ", obd_type, obd_uuid);
+                fprintf(stderr, "quota failed: %s\n", strerror(errno));
+                return rc;
+        }
+
+        if (!name)
+                rc = id2name(&name, getuid(), qctl.qc_type);
+
+        print_quota(mnt, name, &qctl);
+        return 0;
+}
+
 int main(int argc, char **argv)
 {
         int rc;
index 9684127..f345582 100644 (file)
@@ -39,6 +39,7 @@
 #include <stdarg.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include <sys/syscall.h>
 #include <linux/types.h>
 #include <linux/unistd.h>
 
@@ -165,9 +166,12 @@ struct find_param {
         int     quiet;
         struct  obd_uuid        *obduuid;
         int     lumlen;
-        struct  lov_user_md     *lum;
+        struct  lov_user_mds_data *lmd;
+/*        struct  lov_user_md     *lum;*/
         int     got_uuids;
         int     obdindex;
+        int     (* process_file)(DIR *dir, char *dname, char *fname,
+                        struct find_param *param);
 };
 
 /* XXX Max obds per lov currently hardcoded to 1000 in lov/lov_obd.c */
@@ -177,9 +181,9 @@ struct find_param {
 static int prepare_find(struct find_param *param)
 {
         param->lumlen = lov_mds_md_size(MAX_LOV_UUID_COUNT);
-        if ((param->lum = malloc(param->lumlen)) == NULL) {
+        if ((param->lmd = malloc(sizeof(lstat_t) + param->lumlen)) == NULL) {
                 err_msg("unable to allocate %d bytes of memory for ioctl",
-                        param->lumlen);
+                        sizeof(lstat_t) + param->lumlen);
                 return ENOMEM;
         }
 
@@ -193,8 +197,8 @@ static void cleanup_find(struct find_param *param)
 {
         if (param->obduuid)
                 free(param->obduuid);
-        if (param->lum)
-                free(param->lum);
+        if (param->lmd)
+                free(param->lmd);
 }
 
 int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count)
@@ -349,15 +353,15 @@ void lov_dump_user_lmm_v1(struct lov_user_md_v1 *lum, char *dname, char *fname,
 
 void llapi_lov_dump_user_lmm(struct find_param *param, char *dname, char *fname)
 {
-        switch(*(__u32 *)param->lum) { /* lum->lmm_magic */
+        switch(*(__u32 *)&param->lmd->lmd_lmm) { /* lum->lmm_magic */
         case LOV_USER_MAGIC_V1:
-                lov_dump_user_lmm_v1(param->lum, dname, fname, param->obdindex,
+                lov_dump_user_lmm_v1(&param->lmd->lmd_lmm, dname, fname, param->obdindex,
                                      param->quiet, param->verbose,
                                      (param->verbose || !param->obduuid));
                 break;
         default:
                 printf("unknown lmm_magic:  %#x (expecting %#x)\n",
-                       *(__u32 *)param->lum, LOV_USER_MAGIC_V1);
+                       *(__u32 *)&param->lmd->lmd_lmm, LOV_USER_MAGIC_V1);
                 return;
         }
 }
@@ -411,14 +415,14 @@ int op_get_file_stripe(char *path, struct lov_user_md *lum)
         return llapi_file_get_stripe(path, lum);
 }
 
-static int process_file(DIR *dir, char *dname, char *fname,
+static int find_process_file(DIR *dir, char *dname, char *fname,
                         struct find_param *param)
 {
         int rc;
 
-        strncpy((char *)param->lum, fname, param->lumlen);
+        strncpy((char *)&param->lmd->lmd_lmm, fname, param->lumlen);
 
-        rc = ioctl(dirfd(dir), IOC_MDC_GETSTRIPE, (void *)param->lum);
+        rc = ioctl(dirfd(dir), IOC_MDC_GETSTRIPE, (void *)&param->lmd->lmd_lmm);
         if (rc) {
                 if (errno == ENODATA) {
                         if (!param->obduuid && !param->quiet)
@@ -478,8 +482,8 @@ static int process_dir(DIR *dir, char *dname, struct find_param *param)
         }
 
         /* retrieve dir's stripe info */
-        strncpy((char *)param->lum, dname, param->lumlen);
-        rc = ioctl(dirfd(dir), LL_IOC_LOV_GETSTRIPE, (void *)param->lum);
+        strncpy((char *)&param->lmd->lmd_lmm, dname, param->lumlen);
+        rc = ioctl(dirfd(dir), LL_IOC_LOV_GETSTRIPE, (void *)&param->lmd->lmd_lmm);
         if (rc) {
                 if (errno == ENODATA) {
                         if (!param->obduuid && param->verbose)
@@ -528,7 +532,7 @@ static int process_dir(DIR *dir, char *dname, struct find_param *param)
                                 return rc;
                         break;
                 case DT_REG:
-                        rc = process_file(dir, dname, dirp->d_name, param);
+                        rc = param->process_file(dir, dname, dirp->d_name, param);
                         if (rc)
                                 return rc;
                         break;
@@ -580,7 +584,7 @@ static int process_path(char *path, struct find_param *param)
                         if (!param->got_uuids)
                                 rc = setup_obd_uuids(dir, dname, param);
                         if (rc == 0)
-                                rc = process_file(dir, dname, fname, param);
+                                rc = param->process_file(dir, dname, fname, param);
                         closedir(dir);
                 }
         }
@@ -598,6 +602,7 @@ int llapi_find(char *path, struct obd_uuid *obduuid, int recursive,
         param.recursive = recursive;
         param.verbose = verbose;
         param.quiet = quiet;
+        param.process_file = find_process_file;
         if (obduuid) {
                 param.obduuid = malloc(sizeof(*obduuid));
                 if (param.obduuid == NULL) {
@@ -742,3 +747,117 @@ int llapi_is_lustre_mnttype(char *type)
 {
         return (strcmp(type,"lustre") == 0 || strcmp(type,"lustre_lite") == 0);
 }
+
+int llapi_quotacheck(char *mnt, int check_type)
+{
+        DIR *root;
+        int rc;
+
+        root = opendir(mnt);
+        if (!root) {
+                err_msg("open %s failed", mnt);
+                return -1;
+        }
+
+        rc = ioctl(dirfd(root), LL_IOC_QUOTACHECK, check_type);
+
+        closedir(root);
+        return rc;
+}
+
+int llapi_poll_quotacheck(char *mnt, struct if_quotacheck *qchk)
+{
+        DIR *root;
+        int poll_intvl = 2;
+        int rc;
+
+        root = opendir(mnt);
+        if (!root) {
+                err_msg("open %s failed", mnt);
+                return -1;
+        }
+
+        while (1) {
+                rc = ioctl(dirfd(root), LL_IOC_POLL_QUOTACHECK, qchk);
+                if (!rc || errno != ENODATA)
+                        break;
+                sleep(poll_intvl);
+                if (poll_intvl < 30)
+                        poll_intvl *= 2;
+        }
+
+        closedir(root);
+        return rc;
+}
+
+int llapi_quotactl(char *mnt, struct if_quotactl *qctl)
+{
+        DIR *root;
+        int rc;
+
+        root = opendir(mnt);
+        if (!root) {
+                err_msg("open %s failed", mnt);
+                return -1;
+        }
+
+        rc = ioctl(dirfd(root), LL_IOC_QUOTACTL, qctl);
+
+        closedir(root);
+        return rc;
+}
+
+static int quotachog_process_file(DIR *dir, char *dname, char *fname,
+                        struct find_param *param)
+{
+        lstat_t *st;
+        char pathname[PATH_MAX + 1] = "";
+        int rc;
+
+        strncpy((char *)param->lmd, fname, param->lumlen);
+
+        rc = ioctl(dirfd(dir), IOC_MDC_GETFILEINFO, (void *)param->lmd);
+        if (rc) {
+                if (errno == ENODATA) {
+                        if (!param->obduuid && !param->quiet)
+                                fprintf(stderr,
+                                        "%s/%s has no stripe info\n",
+                                        dname, fname);
+                        rc = 0;
+                } else if (errno != EISDIR) {
+                        err_msg("IOC_MDC_GETFILEINFO ioctl failed");
+                        rc = errno;
+                }
+                return rc;
+        }
+
+        st = &param->lmd->lmd_st;
+        snprintf(pathname, sizeof(pathname), "%s/%s", dname, fname);
+        rc = syscall(SYS_chown, pathname, st->st_uid, st->st_gid);
+        if (rc)
+                fprintf(stderr, "chown %s (%u,%u) fail: %s\n",
+                        pathname, st->st_uid, st->st_gid, strerror(errno));
+        return rc;
+}
+
+int llapi_quotachog(char *path, int flag)
+{
+        struct find_param param;
+        int ret = 0;
+
+        memset(&param, 0, sizeof(param));
+        param.recursive = 1;
+        param.verbose = 0;
+        param.quiet = 1;
+        param.process_file = quotachog_process_file;
+
+        ret = prepare_find(&param);
+        if (ret)
+                goto out;
+
+        process_path(path, &param);
+out:
+        cleanup_find(&param);
+        return ret;
+}
+
index cc53e12..a301783 100644 (file)
@@ -201,6 +201,38 @@ check_obd_ioobj(void)
 }
 
 void
+check_obd_quotactl(void)
+{
+        BLANK_LINE();
+        CHECK_STRUCT(obd_quotactl);
+        CHECK_MEMBER(obd_quotactl, qc_cmd);
+        CHECK_MEMBER(obd_quotactl, qc_type);
+        CHECK_MEMBER(obd_quotactl, qc_id);
+        CHECK_MEMBER(obd_quotactl, qc_stat);
+        CHECK_MEMBER(obd_quotactl, qc_dqinfo);
+        CHECK_MEMBER(obd_quotactl, qc_dqblk);
+
+        BLANK_LINE();
+        CHECK_STRUCT(obd_dqinfo);
+        CHECK_MEMBER(obd_dqinfo, dqi_bgrace);
+        CHECK_MEMBER(obd_dqinfo, dqi_igrace);
+        CHECK_MEMBER(obd_dqinfo, dqi_flags);
+        CHECK_MEMBER(obd_dqinfo, dqi_valid);
+
+        BLANK_LINE();
+        CHECK_STRUCT(obd_dqblk);
+        CHECK_MEMBER(obd_dqblk, dqb_bhardlimit);
+        CHECK_MEMBER(obd_dqblk, dqb_bsoftlimit);
+        CHECK_MEMBER(obd_dqblk, dqb_curspace);
+        CHECK_MEMBER(obd_dqblk, dqb_ihardlimit);
+        CHECK_MEMBER(obd_dqblk, dqb_isoftlimit);
+        CHECK_MEMBER(obd_dqblk, dqb_curinodes);
+        CHECK_MEMBER(obd_dqblk, dqb_btime);
+        CHECK_MEMBER(obd_dqblk, dqb_itime);
+        CHECK_MEMBER(obd_dqblk, dqb_valid);
+}
+
+void
 check_niobuf_remote(void)
 {
         BLANK_LINE();
@@ -522,6 +554,7 @@ check_llog_logid(void)
         CHECK_VALUE(OST_SZ_REC);
         CHECK_VALUE(OST_RAID1_REC);
         CHECK_VALUE(MDS_UNLINK_REC);
+        CHECK_VALUE(MDS_SETATTR_REC);
         CHECK_VALUE(OBD_CFG_REC);
         CHECK_VALUE(PTL_CFG_REC);
         CHECK_VALUE(LLOG_GEN_REC);
@@ -690,6 +723,17 @@ check_llogd_conn_body(void)
 }
 
 void
+check_qunit_data(void)
+{
+        BLANK_LINE();
+        CHECK_STRUCT(qunit_data);
+        CHECK_MEMBER(qunit_data, qd_id);
+        CHECK_MEMBER(qunit_data, qd_type);
+        CHECK_MEMBER(qunit_data, qd_count);
+        CHECK_MEMBER(qunit_data, qd_isblk);
+}
+
+void
 system_string (char *cmdline, char *str, int len)
 {
         int   fds[2];
@@ -791,6 +835,8 @@ main(int argc, char **argv)
         CHECK_VALUE(OST_SAN_READ);
         CHECK_VALUE(OST_SAN_WRITE);
         CHECK_VALUE(OST_SYNC);
+        CHECK_VALUE(OST_QUOTACHECK);
+        CHECK_VALUE(OST_QUOTACTL);
         CHECK_VALUE(OST_LAST_OPC);
 
         CHECK_DEFINE(OBD_OBJECT_EOF);
@@ -811,6 +857,8 @@ main(int argc, char **argv)
         CHECK_VALUE(MDS_SYNC);
         CHECK_VALUE(MDS_DONE_WRITING);
         CHECK_VALUE(MDS_SET_INFO);
+        CHECK_VALUE(MDS_QUOTACHECK);
+        CHECK_VALUE(MDS_QUOTACTL);
         CHECK_VALUE(MDS_LAST_OPC);
 
         CHECK_VALUE(REINT_SETATTR);
@@ -861,8 +909,12 @@ main(int argc, char **argv)
 
         CHECK_VALUE(OBD_PING);
         CHECK_VALUE(OBD_LOG_CANCEL);
+        CHECK_VALUE(OBD_QC_CALLBACK);
         CHECK_VALUE(OBD_LAST_OPC);
 
+        CHECK_VALUE(QUOTA_DQACQ);
+        CHECK_VALUE(QUOTA_DQREL);
+
         COMMENT("Sizes and Offsets");
         BLANK_LINE();
         check_lustre_handle();
@@ -871,6 +923,7 @@ main(int argc, char **argv)
         check_lov_mds_md_v1();
         check_obd_statfs();
         check_obd_ioobj();
+        check_obd_quotactl();
         check_niobuf_remote();
         check_ost_body();
         check_ll_fid();
@@ -909,6 +962,7 @@ main(int argc, char **argv)
         check_llog_cookie();
         check_llogd_body();
         check_llogd_conn_body();
+        check_qunit_data();
 
         printf("}\n\n");
 
index b8ec996..4c20368 100644 (file)
@@ -25,8 +25,8 @@ int main()
 void lustre_assert_wire_constants(void)
 {
         /* Wire protocol assertions generated by 'wirecheck'
-         * running on Linux milano 2.6.5-7.141_87k.3-b1_4_bgl.200503212049bigsmp #1 SMP Mon Mar 21 20
-         * with gcc version 3.3.4 20040817 (Red Hat Linux 3.3.4-2) */
+         * running on Linux localhost.localdomain 2.4.20-8 #3 ËÄ 9Ô 23 15:12:02 CST 2004 i686 i686 i
+         * with gcc version 3.2.2 20030222 (Red Hat Linux 3.2.2-5) */
 
 
         /* Constants... */
@@ -86,7 +86,11 @@ void lustre_assert_wire_constants(void)
                  (long long)OST_SAN_WRITE);
         LASSERTF(OST_SYNC == 16, " found %lld\n",
                  (long long)OST_SYNC);
-        LASSERTF(OST_LAST_OPC == 18, " found %lld\n",
+        LASSERTF(OST_QUOTACHECK == 18, " found %lld\n",
+                 (long long)OST_QUOTACHECK);
+        LASSERTF(OST_QUOTACTL == 19, " found %lld\n",
+                 (long long)OST_QUOTACTL);
+        LASSERTF(OST_LAST_OPC == 20, " found %lld\n",
                  (long long)OST_LAST_OPC);
         LASSERTF(OBD_OBJECT_EOF == 0xffffffffffffffffULL," found %lld\n",
                  (long long)OBD_OBJECT_EOF);
@@ -120,7 +124,11 @@ void lustre_assert_wire_constants(void)
                  (long long)MDS_DONE_WRITING);
         LASSERTF(MDS_SET_INFO == 46, " found %lld\n",
                  (long long)MDS_SET_INFO);
-        LASSERTF(MDS_LAST_OPC == 47, " found %lld\n",
+        LASSERTF(MDS_QUOTACHECK == 47, " found %lld\n",
+                 (long long)MDS_QUOTACHECK);
+        LASSERTF(MDS_QUOTACTL == 48, " found %lld\n",
+                 (long long)MDS_QUOTACTL);
+        LASSERTF(MDS_LAST_OPC == 49, " found %lld\n",
                  (long long)MDS_LAST_OPC);
         LASSERTF(REINT_SETATTR == 1, " found %lld\n",
                  (long long)REINT_SETATTR);
@@ -202,8 +210,14 @@ void lustre_assert_wire_constants(void)
                  (long long)OBD_PING);
         LASSERTF(OBD_LOG_CANCEL == 401, " found %lld\n",
                  (long long)OBD_LOG_CANCEL);
-        LASSERTF(OBD_LAST_OPC == 402, " found %lld\n",
+        LASSERTF(OBD_QC_CALLBACK == 402, " found %lld\n",
+                 (long long)OBD_QC_CALLBACK);
+        LASSERTF(OBD_LAST_OPC == 403, " found %lld\n",
                  (long long)OBD_LAST_OPC);
+        LASSERTF(QUOTA_DQACQ == 601, " found %lld\n",
+                 (long long)QUOTA_DQACQ);
+        LASSERTF(QUOTA_DQREL == 602, " found %lld\n",
+                 (long long)QUOTA_DQREL);
         /* Sizes and Offsets */
 
 
@@ -517,6 +531,94 @@ void lustre_assert_wire_constants(void)
         LASSERTF((int)sizeof(((struct obd_ioobj *)0)->ioo_bufcnt) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct obd_ioobj *)0)->ioo_bufcnt));
 
+        /* Checks for struct obd_quotactl */
+        LASSERTF((int)sizeof(struct obd_quotactl) == 112, " found %lld\n",
+                 (long long)(int)sizeof(struct obd_quotactl));
+        LASSERTF((int)offsetof(struct obd_quotactl, qc_cmd) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_quotactl, qc_cmd));
+        LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_cmd) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_cmd));
+        LASSERTF((int)offsetof(struct obd_quotactl, qc_type) == 4, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_quotactl, qc_type));
+        LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_type) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_type));
+        LASSERTF((int)offsetof(struct obd_quotactl, qc_id) == 8, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_quotactl, qc_id));
+        LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_id) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_id));
+        LASSERTF((int)offsetof(struct obd_quotactl, qc_stat) == 12, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_quotactl, qc_stat));
+        LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_stat) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_stat));
+        LASSERTF((int)offsetof(struct obd_quotactl, qc_dqinfo) == 16, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_quotactl, qc_dqinfo));
+        LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_dqinfo) == 24, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_dqinfo));
+        LASSERTF((int)offsetof(struct obd_quotactl, qc_dqblk) == 40, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_quotactl, qc_dqblk));
+        LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_dqblk) == 72, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_dqblk));
+
+        /* Checks for struct obd_dqinfo */
+        LASSERTF((int)sizeof(struct obd_dqinfo) == 24, " found %lld\n",
+                 (long long)(int)sizeof(struct obd_dqinfo));
+        LASSERTF((int)offsetof(struct obd_dqinfo, dqi_bgrace) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_dqinfo, dqi_bgrace));
+        LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_bgrace) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_bgrace));
+        LASSERTF((int)offsetof(struct obd_dqinfo, dqi_igrace) == 8, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_dqinfo, dqi_igrace));
+        LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_igrace) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_igrace));
+        LASSERTF((int)offsetof(struct obd_dqinfo, dqi_flags) == 16, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_dqinfo, dqi_flags));
+        LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_flags) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_flags));
+        LASSERTF((int)offsetof(struct obd_dqinfo, dqi_valid) == 20, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_dqinfo, dqi_valid));
+        LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_valid) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_valid));
+
+        /* Checks for struct obd_dqblk */
+        LASSERTF((int)sizeof(struct obd_dqblk) == 72, " found %lld\n",
+                 (long long)(int)sizeof(struct obd_dqblk));
+        LASSERTF((int)offsetof(struct obd_dqblk, dqb_bhardlimit) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_dqblk, dqb_bhardlimit));
+        LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_bhardlimit) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_bhardlimit));
+        LASSERTF((int)offsetof(struct obd_dqblk, dqb_bsoftlimit) == 8, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_dqblk, dqb_bsoftlimit));
+        LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_bsoftlimit) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_bsoftlimit));
+        LASSERTF((int)offsetof(struct obd_dqblk, dqb_curspace) == 16, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_dqblk, dqb_curspace));
+        LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_curspace) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_curspace));
+        LASSERTF((int)offsetof(struct obd_dqblk, dqb_ihardlimit) == 24, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_dqblk, dqb_ihardlimit));
+        LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_ihardlimit) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_ihardlimit));
+        LASSERTF((int)offsetof(struct obd_dqblk, dqb_isoftlimit) == 32, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_dqblk, dqb_isoftlimit));
+        LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_isoftlimit) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_isoftlimit));
+        LASSERTF((int)offsetof(struct obd_dqblk, dqb_curinodes) == 40, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_dqblk, dqb_curinodes));
+        LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_curinodes) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_curinodes));
+        LASSERTF((int)offsetof(struct obd_dqblk, dqb_btime) == 48, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_dqblk, dqb_btime));
+        LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_btime) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_btime));
+        LASSERTF((int)offsetof(struct obd_dqblk, dqb_itime) == 56, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_dqblk, dqb_itime));
+        LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_itime) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_itime));
+        LASSERTF((int)offsetof(struct obd_dqblk, dqb_valid) == 64, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_dqblk, dqb_valid));
+        LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_valid) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_valid));
+
         /* Checks for struct niobuf_remote */
         LASSERTF((int)sizeof(struct niobuf_remote) == 16, " found %lld\n",
                  (long long)(int)sizeof(struct niobuf_remote));
@@ -1188,6 +1290,8 @@ void lustre_assert_wire_constants(void)
                  (long long)OST_RAID1_REC);
         LASSERTF(MDS_UNLINK_REC == 274801668, " found %lld\n",
                  (long long)MDS_UNLINK_REC);
+        LASSERTF(MDS_SETATTR_REC == 274801665, " found %lld\n",
+                 (long long)MDS_SETATTR_REC);
         LASSERTF(OBD_CFG_REC == 274857984, " found %lld\n",
                  (long long)OBD_CFG_REC);
         LASSERTF(PTL_CFG_REC == 274923520, " found %lld\n",
@@ -1484,5 +1588,25 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct llogd_conn_body, lgdc_ctxt_idx));
         LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx));
+
+        /* Checks for struct qunit_data */
+        LASSERTF((int)sizeof(struct qunit_data) == 16, " found %lld\n",
+                 (long long)(int)sizeof(struct qunit_data));
+        LASSERTF((int)offsetof(struct qunit_data, qd_id) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct qunit_data, qd_id));
+        LASSERTF((int)sizeof(((struct qunit_data *)0)->qd_id) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct qunit_data *)0)->qd_id));
+        LASSERTF((int)offsetof(struct qunit_data, qd_type) == 4, " found %lld\n",
+                 (long long)(int)offsetof(struct qunit_data, qd_type));
+        LASSERTF((int)sizeof(((struct qunit_data *)0)->qd_type) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct qunit_data *)0)->qd_type));
+        LASSERTF((int)offsetof(struct qunit_data, qd_count) == 8, " found %lld\n",
+                 (long long)(int)offsetof(struct qunit_data, qd_count));
+        LASSERTF((int)sizeof(((struct qunit_data *)0)->qd_count) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct qunit_data *)0)->qd_count));
+        LASSERTF((int)offsetof(struct qunit_data, qd_isblk) == 12, " found %lld\n",
+                 (long long)(int)offsetof(struct qunit_data, qd_isblk));
+        LASSERTF((int)sizeof(((struct qunit_data *)0)->qd_isblk) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct qunit_data *)0)->qd_isblk));
 }