From: green Date: Fri, 1 Apr 2005 21:32:29 +0000 (+0000) Subject: Landing b1_4_quotaoff to b1_4 X-Git-Tag: v1_8_0_110~486^7~70 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=69c0f5e7839a36c32e63bc6c0c4b43095c2e57a5;p=fs%2Flustre-release.git Landing b1_4_quotaoff to b1_4 --- diff --git a/ldiskfs/ldiskfs/Makefile.in b/ldiskfs/ldiskfs/Makefile.in index e2aff01..be51da2 100644 --- a/ldiskfs/ldiskfs/Makefile.in +++ b/ldiskfs/ldiskfs/Makefile.in @@ -1,6 +1,6 @@ default: all -MODULES := ldiskfs +MODULES := ldiskfs quotafmt_test # copy makefile over to not break patches ext3_extra := $(wildcard @LINUX@/fs/ext3/Makefile) @@ -11,9 +11,13 @@ linux_headers := $(wildcard @LINUX@/include/linux/ext3*.h) ext3_sources := $(filter-out %.mod.c,$(wildcard @LINUX@/fs/ext3/*.c)) new_sources := iopen.c iopen.h extents.c mballoc.c new_headers := ext3_extents.h -ldiskfs_sources := $(notdir $(ext3_sources) $(ext3_headers)) $(new_sources) $(new_headers) +quotafmt_sources := lustre_quota_fmt.c +quotafmt_headers := lustre_quota_fmt.h +ldiskfs_patched_sources := $(notdir $(ext3_sources) $(ext3_headers)) $(new_sources) $(new_headers) +ldiskfs_sources := $(ldiskfs_patched_sources) $(quotafmt_sources) $(quotafmt_headers) ldiskfs-objs := $(filter %.o,$(ldiskfs_sources:.c=.o)) +quotafmt-objs := quotafmt_test.o EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LUSTRE@ -I@LUSTRE@/ldiskfs diff --git a/ldiskfs/ldiskfs/autoMakefile.am b/ldiskfs/ldiskfs/autoMakefile.am index a31e01a..4f9e784 100644 --- a/ldiskfs/ldiskfs/autoMakefile.am +++ b/ldiskfs/ldiskfs/autoMakefile.am @@ -6,7 +6,7 @@ endif ldiskfs_linux_headers := $(addprefix linux/,$(subst ext3,ldiskfs,$(notdir $(linux_headers)))) -$(filter %.c,$(ldiskfs_sources)): sources $(ldiskfs_linux_headers) $(filter %.h,$(ldiskfs_sources)) +$(filter %.c,$(ldiskfs_patched_sources)): sources $(ldiskfs_linux_headers) $(filter %.h,$(ldiskfs_patched_sources)) ldiskfs_sed_flags = \ -e "s/dx_hash_info/ext3_dx_hash_info/g" \ @@ -71,7 +71,9 @@ foo-check: @echo "ldiskfs_LDADD: $(ldiskfs_LDADD)" MOSTLYCLEANFILES = *.o *.ko *.mod.c -CLEANFILES = sources *.c *.h +CLEANFILES = sources $(notdir $(linux_headers) $(ext3_headers) $(ext3_sources) $(new_sources) $(new_headers)) + +EXTRA_DIST := lustre_quota_fmt.c lustre_quota_fmt.h quotafmt_test.c clean: clean-am rm -rf linux linux-stage diff --git a/lustre/doc/quota_hld.lyx b/lustre/doc/quota_hld.lyx new file mode 100644 index 0000000..86d1096 --- /dev/null +++ b/lustre/doc/quota_hld.lyx @@ -0,0 +1,1231 @@ +#LyX 1.3 created this file. For more info see http://www.lyx.org/ +\lyxformat 221 +\textclass article +\language english +\inputencoding auto +\fontscheme times +\graphics default +\paperfontsize default +\spacing single +\papersize Default +\paperpackage a4 +\use_geometry 0 +\use_amsmath 0 +\use_natbib 0 +\use_numerical_citations 0 +\paperorientation portrait +\secnumdepth 3 +\tocdepth 3 +\paragraph_separation skip +\defskip medskip +\quotes_language english +\quotes_times 2 +\papercolumns 1 +\papersides 1 +\paperpagestyle default + +\layout Title + +Quota For Lustre +\layout Section + +From Engineering Requirements Specification +\layout Enumerate + +Lustre can operate and enforce disk block quota and file quota. +\layout Enumerate + +Hard and soft quota are supported +\layout Enumerate + +Central management tools enable setting limits for users and initializing + quota check operations +\layout Enumerate + +Quota are only needed for Linux 2.6 +\layout Section + +Specification of subsystems +\layout Description + +Definition: An +\emph on +operational quota file +\emph default + is a quota database containing limits for some uid's and gid's which is + being used to enforce quota. + An +\emph on +administrative quota file +\emph default + is a similar database, but it is used for recovery and soft quota or administra +tive purposes. +\layout Subsection + +Master & slaves +\layout Standard + +A node is a master +\series bold +for a uid or gid +\series default +if the node holds the cluster wide limits (hard, soft, files, blocks & gracetime +s) for that uid or gid in an administrative quota file. + The administrative quota file is similar to normal ext3 quota file. + The data structures and code for an administrative quota file API will + be copied from the Linux VFS to ldiskfs and amended. + Slave nodes (all other servers) only consider hard quota and only have + operational quota files. +\layout Standard + +Note that a node may be a master for some uid's, gid's and a slave for others. + Masters also have an operational quota file for enforcing hard quota . + Master +\series bold +observe soft limits in the administrative file, based on grace times +\series default +. +\layout Subsection + +Acquire / release protocol +\layout Standard + +The master administrative quota file has two kinds of limits: total limits + and limit acquired by all servers (administrative usage). + Totoal limits are set by user, administrative usage is initialized to zero + and it's amended when master/slaves acquire or release quota. +\layout Standard + +Quota slaves can acquire from the master and release to the master qunits + of disk space (>100MB typically, see ERS). + Slaves do this to increase / lower their hard limits of operational file. + Upon acquiring quota from a master the master's administrative usage are + increased. + Master can acqurie/release qunits, just like slaves, except that it is + done locally. +\layout Standard + +On the master only, soft limits are enforced in obd layer based on the administr +ative quota file. + Once administrative usage >= administrative soft limit, the timer is activated. +\layout Subsection + +Chown Operations +\layout Standard + +All objects associated with a file will have their owners set to that of + the MDS inode. + These chown operations occur in connection with file creation and chowning + on the MDS and are asynchronous. + There will also be enough space in the records to set an EA on the objects + indicating the originating MDS, fileset and storage id of the inode. + The arguments will contain the following - but the final format of the + packet sent is subject to approval by management (it may be larger): +\layout LyX-Code + +struct object_setattr_args { +\layout LyX-Code + + __u64 osa_mds_id; /* to identify MDS */ +\layout LyX-Code + + __u64 osa_fileset_id; /* part of the fid, tbd */ +\layout LyX-Code + + __u64 osa_ino; /* inode number on mds */ +\layout LyX-Code + + __u64 osa_gen; /* inode generation on mds */ +\layout LyX-Code + + __u32 osa_uid; /* owner of the file */ +\layout LyX-Code + + __u32 osa_gid; /* group of the file */ +\layout LyX-Code + + __u64 osa_mds_transno;/* for recovery of mds rollback */ +\layout LyX-Code + + __u64 osa_mds_last_committed; +\layout LyX-Code + + __u32 osa_mds_prev_uid; /* to undo things that didn't complete on + the MDS */ +\layout LyX-Code + + __u32 osa_mds_prev_gid; +\layout LyX-Code + +} +\layout Subsection + +Recovery +\layout Standard + +A recovery protocol for limits involves +\layout Description + +Master\SpecialChar ~ +recovery re-writing the operational limits on the master node, based + on the cluster-wide limits as found in the administrative quota file +\layout Description + +Slave\SpecialChar ~ +recovery completing aborted release operations on slaves. + +\layout Standard + +Chown operations for objects will use llog recovery on the MDS (as it is + used for unlinks). + +\layout Standard + +MDS chown operations that are lost are not recovered at this point - but + arguments to do so in the future are passed as above. + The recovery from this is fairly simple: the OST writes log operations + for each chown operation containing the MDS transaction number and undo + information. + The MDS reports last committed transactions to the OST. + During normal use these lead to cancellations of records leading up to + that transaction. + During recovery, all llog records following the record containing the transacti +on number will be used to undo the OST chown/chgrp operations. +\layout Standard + +For new files, removal of objects does already take place. +\layout Subsection + +Configuration +\layout Standard + +A configuration protocol will initiate quota check operations, turn quota + on, and set limits. + All commands will be issued through lfs. +\layout Subsection + +Disk fs handling +\layout Standard + +Disk file systems track quota usage. + An interface between OSS and MDS and disk file systems will enable a check + and adjustment of disk file system quota limits before operations proceed. + Every node will try to acquire quota before proceeding. + Every node will release quota after finishing. + Acquire and release calls are tuned to anticipate use. + Disk fs quota check handling will be possibly on busy file systems. +\layout Section + +Use cases +\layout Standard + +Each use case is an interaction between a +\begin_inset Quotes eld +\end_inset + +user +\begin_inset Quotes erd +\end_inset + + and +\begin_inset Quotes eld +\end_inset + +system +\begin_inset Quotes erd +\end_inset + +. + For each use case we describe what subsystem forms the +\begin_inset Quotes eld +\end_inset + +user +\begin_inset Quotes erd +\end_inset + + and the +\begin_inset Quotes eld +\end_inset + +system +\begin_inset Quotes erd +\end_inset + +. + Use the logical components indicated in sections 3.1-3.4 below to describe + the use cases. + The purpose is to check that each of the use cases at a high level appears + to execute successfully by using the components listed under 3.1-3.4. + In some of the scenarios (e.g. + 3.2 multiple use scenarios should be described, e.g. + how is the slave-master protocol involved and how is the client - oss protocol + involved). + +\layout Subsection + +Initialization operation +\layout Subsubsection + +Changing owners +\layout Standard + +The following operations are done on a client: +\layout List +\labelwidthstring 00.00.0000 + +Administrator get root priviliges on the file system +\layout List +\labelwidthstring 00.00.0000 + +Administrator run `find -type f | xargs lchog` +\begin_deeper +\layout Enumerate + + is mount point +\layout Enumerate + + +\emph on +lchog +\emph default + is a small utility to do chown/chgrp, its usage: +\begin_deeper +\layout Standard + + +\emph on +lchog [-i] FILE... + +\emph default + +\layout Description + + +\emph on +-i +\emph default + ignore ENOENT error +\end_deeper +\end_deeper +\layout List +\labelwidthstring 00.00.0000 + +System +\emph on +lchog +\emph default + will abort if change failed, and then report error, indicating what was + searched etc. + Generally user cannot ignore the error, and should fix it and redo the + above before the next operation, except that user can set +\emph on +-i +\emph default + option for +\emph on +lchog +\emph default + to ignore ENOENT error. + +\layout Subsubsection + +Mounting existing file systems with quota support +\layout List +\labelwidthstring 00.00.0000 + +Administrator file systems on all server nodes should be mounted with quota + support, this can be done by running +\emph on + lconf +\emph default + on all nodes: +\emph on +lconf --mountfsoptions quota ..., +\emph default + if the file system has already been mounted, it should be umounted first. +\layout List +\labelwidthstring 00.00.0000 + +System all needed modules are loaded, and file systems are mounted with + quota support. +\layout List +\labelwidthstring 00.00.0000 + +Administrator run `lfs quotacheck`, it will initiate quota check on all + MDS' and OSTs one by one. + +\layout List +\labelwidthstring 00.00.0000 + +System on each node ``quotacheck'' will walk through the diskfs. + When the check finishes, it will report the check status to the initiator. + If it failed, the error is listed. + +\layout List +\labelwidthstring 00.00.0000 + +Administrator user should fix the errors and recheck the specified nodes + before preceeding to the next step. + +\layout List +\labelwidthstring 00.00.0000 + +Administrator run `lfs quotaon`, it will initiate quotaon on all MDS' and + OSTs one by one. +\layout List +\labelwidthstring 00.00.0000 + +System each node will start to check/handle quota. + The status will be reported back to the initiator. +\layout List +\labelwidthstring 00.00.0000 + +Administrator user should fix the errors if there are. +\layout List +\labelwidthstring 00.00.0000 + +Administrator run `lfs setquota`, it will set limits on the corresponding + MDS master for the specified uid/gid. +\layout List +\labelwidthstring 00.00.0000 + +System if it's the first time to set limits, master will initialize quota + on all slaves, otherwise only modify the quota of itself. + Moreover, the limit info is saved in recovery quota file on master. + +\series bold + +\series default +The status will be reported to initiator. +\layout List +\labelwidthstring 00.00.0000 + +Administrator if some nodes failed, generally user should not ignore the + errors. +\layout Subsubsection + +a new file system to a state where it is using quota +\layout Standard + +Like above, but only need three steps: `lfs quotacheck`, `lfs quotaon` and + `lfs setquota`. +\layout Subsection + +Normal use block quota +\layout Standard + +Demonstrate how quota are acquired and released during normal use through + sequences of the API's and network calls defined in this document. +\layout Standard + + +\series bold +DESCRIBE CASES WHERE +\layout Enumerate + +A USER DOES THIS OR THAT: WHAT are the system responses +\layout Enumerate + +The client does this or that: what are the OSS & MDS responses +\layout Enumerate + +The OST does this or that, what are the obdfilter / diskfs reponses +\layout Subsubsection + +Acquire quota +\layout List +\labelwidthstring 00.00.0000 + +User issues file write operation. +\layout List +\labelwidthstring 00.00.0000 + +System performs write successfully and returns the written bytes. +\newline + +\layout List +\labelwidthstring 00.00.0000 + +Client makes IO requests to OSS. + +\layout List +\labelwidthstring 00.00.0000 + +OSS acquires qunit if needed. +\layout List +\labelwidthstring 00.00.0000 + +Master increase usage in adminstrative file then reply to OSS with granted + qunit. +\layout List +\labelwidthstring 00.00.0000 + +OSS updates local operational quota file, performs write operation and replies + client the ~noquota flag. +\newline + +\layout List +\labelwidthstring 00.00.0000 + +OST calls obd_commitrw to commit write. +\layout List +\labelwidthstring 00.00.0000 + +Obdfilter if not enough qunit, acquire qunit by dqacq rpc from master, updates + local operational quota file after dqacq reply, then performs normal direct + write. + +\layout Subsubsection + + +\begin_inset LatexCommand \label{release-quota} + +\end_inset + +Release quota +\layout List +\labelwidthstring 00.00.0000 + +User issues truncate or unlink operation. +\layout List +\labelwidthstring 00.00.0000 + +System performs the truncate/unlink operation and returns error code. +\newline + +\layout List +\labelwidthstring 00.00.0000 + +Client makes OST_PUNCH or OST_DESTROY requests to OSS. +\layout List +\labelwidthstring 00.00.0000 + +OSS performs truncate/unlink on objects. + release qunit to Master if needed. +\layout List +\labelwidthstring 00.00.0000 + +Master decrease usage in administrative file and reply to OSS. +\layout List +\labelwidthstring 00.00.0000 + +OSS updates local operational quota file. +\newline + +\layout List +\labelwidthstring 00.00.0000 + +OST calls obd_destroy/obd_punch. +\layout List +\labelwidthstring 00.00.0000 + +Obdfilter performs unlink/truncate on objects, if there is qunit to be released, + release qunit by dqrel rpc to master then updates local operational quota + file. + +\layout Subsection + +Running out of block quota +\layout List +\labelwidthstring 00.00.0000 + +User issues file write operation. +\layout List +\labelwidthstring 00.00.0000 + +System write fails and return EDQUOT. + (but the pages in cache will be written successfully) +\newline + +\layout List +\labelwidthstring 00.00.0000 + +Client makes IO requests to OSS. +\layout List +\labelwidthstring 00.00.0000 + +OSS acquires qunit from master. +\layout List +\labelwidthstring 00.00.0000 + +Master reply noquota to OSS. +\layout List +\labelwidthstring 00.00.0000 + +OSS fs write fails, rewrites pages from client cache forcibly, replies client + the noquota flag and error code. +\newline + +\layout List +\labelwidthstring 00.00.0000 + +OST calls obd_commitrw to commit write. +\layout List +\labelwidthstring 00.00.0000 + +Obdfilter acquiring qunit fails, then performs normal direct write and fails, + and then rewrites the pages from client cache, returns error code and noquota + flag to OST. + +\layout Subsection + +Freeing space to get under quota +\layout Standard + +The release steps are the same as those in +\begin_inset LatexCommand \ref{release-quota} + +\end_inset + +3.2.2. +\layout List +\labelwidthstring 00.00.0000 + +User issues file write operation. +\layout List +\labelwidthstring 00.00.0000 + +Client makes synchronous write rpc to OSS if there is noquota flag. +\layout List +\labelwidthstring 00.00.0000 + +OSS performs fs write successfully, return client ~noquota flag. +\layout List +\labelwidthstring 00.00.0000 + +Client clears noquota flag for this uid/gid. + +\layout Subsection + +Enforcing soft quota +\layout Subsubsection + +Start soft quota timer +\layout List +\labelwidthstring 00.00.0000 + +User issues file write/create operations. + +\layout List +\labelwidthstring 00.00.0000 + +System returns successfully. +\newline + +\layout List +\labelwidthstring 00.00.0000 + +Client makes file write/create requests to OSS/MDS. +\layout List +\labelwidthstring 00.00.0000 + +OSS/MDS sends dqacq rpcs to get more quota from master. +\layout List +\labelwidthstring 00.00.0000 + +Master starts the timer once administrative usage >= administrative soft + limit and grants qunit to OSS/MDS. +\layout List +\labelwidthstring 00.00.0000 + +OSS/MDS write/create succeeds. +\layout Subsubsection + +Soft quota timer goes off +\layout List +\labelwidthstring 00.00.0000 + +User issues file write/create operations. + +\layout List +\labelwidthstring 00.00.0000 + +System returns EDQUOT. + +\newline + +\layout List +\labelwidthstring 00.00.0000 + +Client makes file write/create requests to OSS/MDS. +\layout List +\labelwidthstring 00.00.0000 + +OSS/MDS sends dqacq rpcs to get more quota from master. +\layout List +\labelwidthstring 00.00.0000 + +Master returns noquota to OSS/MDS. +\layout List +\labelwidthstring 00.00.0000 + +OSS/MDS write/create fails and returns error code to Client. +\layout Subsubsection + +Stop soft quota timer +\layout Standard + +The release steps are the same as those in +\begin_inset LatexCommand \ref{release-quota} + +\end_inset + +3.2.2. + +\layout List +\labelwidthstring 00.00.0000 + +Slave calls dqrel rpc to release extra quota. +\layout List +\labelwidthstring 00.00.0000 + +Master stops the timer once administrative usage < administrative soft limit. + +\layout Subsection + +File quota on the MDS +\layout Standard + +For CMD, it is similiar to block quota described above. + For b1_4, it is completely managed by MDS locally. + +\layout Subsection + +Listing quota +\layout List +\labelwidthstring 00.00.0000 + +User runs 'lfs quota', it will make an rpc to the corresponding MDS master + for the specified uid/gid. +\layout List +\labelwidthstring 00.00.0000 + +System displays usage & limits related to quota for the uid/gid on all nodes + in the cluster. + if some nodes failed, reports the error to user. + +\layout List +\labelwidthstring 00.00.0000 + +User generally can ignore the errors. + +\layout Subsection + +Recovery of quota +\layout Standard + + +\series bold +just describe interaction initiator - response, no internals +\layout Subsubsection + +Slave recovery +\layout List +\labelwidthstring 00.00.0000 + +Slave releases unreasonably high limits to master. +\layout List +\labelwidthstring 00.00.0000 + +Master updates adminstrative quota file and reply to slave. +\layout List +\labelwidthstring 00.00.0000 + +Slave updates local operational quota file. + +\layout Subsubsection + +Master recovery +\layout List +\labelwidthstring 00.00.0000 + +Master enquires all slaves' operational limits by issuing a new RPC. + +\series bold + +\layout List +\labelwidthstring 00.00.0000 + +Slave replies with limit. +\layout List +\labelwidthstring 00.00.0000 + +Master updates administrative quota file. + +\layout Section + +State considerations +\layout Subsection + +Node state +\layout Subsection + +Context state +\layout Section + +Logic specification +\layout Standard + +The quota implementation falls into a few, almost separate, components. +\layout Standard + + +\series bold +ORDER OF IMPLEMENTATION +\layout Enumerate + +Administrative utilities, with sufficient flexibility to create unit test + cases +\layout Enumerate + +Administrative quota file implementation +\layout Enumerate + +OSS enforcement of quota (can be tested separately) +\layout Enumerate + +client - OSS protocol +\layout Enumerate + +quota context +\layout Enumerate + +quota acquire release protocol +\layout Enumerate + +MDS-OST setattr calls +\layout Enumerate + +comprehensive testing of use cases +\layout Enumerate + +recovery protocol +\layout Enumerate + +soft limit +\layout Subsection + +Administrative utilities +\layout Standard + +For all of the following commands it is probably useful to define a single + datastructure that has enough fields to hold all the data that needs to + be transfered. +\layout Description + +Top\SpecialChar ~ +priority +\layout Enumerate + +All utilities are either: +\begin_deeper +\layout Enumerate + +file system ioctls - where non-standard Lustre specific info is needed (e.g. + listing) +\layout Enumerate + +standard quotactl interfaces +\end_deeper +\layout Enumerate + +A lustre obd_iocontrol will allow an MDS to initiate quota check or quotaon + operations on all OST's. + It should be possible to issue this ioctl as a file system ioctl on a client, + or giving an MDS device on an MDS. + +\series bold +NOTE: +\series default +This rpc can be the same as the master to slave recovery enquiry rpc defined + below. +\layout Enumerate + +an obd_iocontrol and special lfs is needed to display usage & limits related + to quota for a uid/gid on all nodes in the cluster. + This needs to be added to lfs and need to be a command that can be issued + from a file system client. + +\layout Enumerate + +a command is needed to set the limits for a uid/gid, perhaps based on a + template. + The limits need to be set on the master and in the limit database. + All slaves need to be notified that quota tracking for the uid/gid is now + in effect (perhaps by increasing quota limits on the node to a non-zero + value). + Similarly it should be possible to disable quota for a uid / gid. +\layout Enumerate + +Documentation for all of these will be implemented as manual page extensions + and as part of the Lustre Users Guide. +\layout Enumerate + +A chown.chgrp utility. + Build a small c utility that stats a file and then issues the chown/chgrp + system call to change the ownder/group on the file. + This is issued from a client, in conjunction with running a find command + to initialize ownership. + This can only be run after the MDS has been changed to incorporate part + 3.3 +\layout Subsection + +Adminstrative quota file & disk file system quota +\layout Enumerate + +The administrative quota file will be a quota file similar to ext3 based + quota files with the usual VFS determined tree format. + +\layout Enumerate + +The VFS quota api will be adapted to enable the administrative commands + to create quota files by name and operate on them without sb (super block) + or dquot quota context arguments as required. +\layout Enumerate + + +\series bold +(Design this, but implementation is second priority) +\series default +Quota check will be adapted to handle checking on a live file system, as + follows: +\begin_deeper +\layout Enumerate + +if inodes are not checked in sequence order (1,2,3, etc) the following is + probably not possible. +\layout Enumerate + +block all operations on an inode while it is being +\begin_inset Quotes eld +\end_inset + +checked +\begin_inset Quotes erd +\end_inset + +. + +\layout Enumerate + +account for quota on inodes that are already checked +\layout Enumerate + +do not account on inodes that are not yet checked +\end_deeper +\layout Subsection + +OSS enforcement +\layout Enumerate + +The direct I/O and truncate calls on the OSS will enforce quota +\layout Subsection + +Client OST/MDT protocol +\layout Standard + +The following component can initially be implemented based on quota status + codes returned by the disk file system. + In due course the status of quota will be determined by the acquire calls + made in the OST or obdfilter. +\layout Enumerate + +All writes functions executed on OST's track quota for newly allocated space. +\layout Enumerate + +If a client flushes a page cache to an OST the data will be written (even + if quota are exceeded). + The mount flags allowing root to squash quota should be used for this. +\layout Enumerate + +If a client exceeds quota, a return code will indicate that the for that + further writes for files owned by that uid/gid must now be done synchronously. +\layout Enumerate + +If quota limits on the OSS are sufficient again, through removal of files + or enlarging limits, the flag must be cleared. +\layout Enumerate + +For MDC file quota are currently handled synchronously on the server. + +\layout Subsection + +Quota context and server quota enforcement +\layout Enumerate + +The MDS will automatically track block quota associated with directories. + It is important the llog files are owned by root users and not subject + to quota +\layout Enumerate + +For root root owned files, Lustre quota should not be enabled (there are + too many administratively controlled root-owned files right now). +\layout Enumerate + +There will be an active +\series bold +quota context +\series default + for a uid or gid for which quota operations are in progress. + Processes acquiring quota will find the context for that user or group + and wait on the context intelligently and not all fire RPC's to the master. + The context should also intelligently handle recovery operations running + concurrently with normal quota use. +\layout Subsection + +Slave to Master acquire / release protocol +\layout Enumerate + +Tunables +\begin_deeper +\layout Enumerate + +All servers will have tunables for qunits and early acquisition of more + qunits. + +\layout Enumerate + +The tunables can be set to configurable values through lconf, one set of + values for slave behavior, one for master behavior each separated for OSS + nodes, one for MDS nodes, as part of the configuration zeroconfig llog. + +\layout Enumerate + +The tunables can also be adjusted dynamically in /proc. + +\layout Enumerate + +Adjusting through proc only is not acceptable. +\end_deeper +\layout Enumerate + +There will be a function that determines the master node for a given uid + or gid. + For the 1.4 branch this function is always returning the MDS, but it will + be designed to make it easy to adapt to clustered metadata. +\layout Enumerate + +There will be dqacq and dqrel rpc's initiated by slave nodes. + The code will be organized so that it can be run on slave OSS and slave + MDS nodes without modification. + These functions will increase / decreate the local limits and administrative + usage on master. +\layout Enumerate + +A unit test program will run a collection of not less than 3 slaves and + a master through a sequence of interesting acquisitions and releases. +\layout Subsection + +Full integration and system testing +\layout Enumerate + +Full unit tests for all components. +\layout Enumerate + +Demonstrate successful handling of recovery from exceeding soft and hard + limits. +\layout Subsection + +MDS - OST setattr calls +\layout Enumerate + +When the MDS creates or chown a file it will queue an asynchronous obd_setattr + rpc to the RPC that: +\begin_deeper +\layout Enumerate + +changes the owner/group of the objects for the file. +\layout Enumerate + +transfers the storage id (ask Yury for data type) to the OSS (this is in + the create case only). + It writes the storage id in an EA. +\end_deeper +\layout Enumerate + +The obd_setattr calls will be journaled almost exactly like mds_unlink calls + in an llog (except that for unlink presently the client unlinks the objects) + and records will be canceled when the setattr commands commit to disk on + the OST. + +\layout Enumerate + +The obd_setattr rpc's will be queued on an RPC set for asynchronous completion, + i.e. + the MDS will reply to the client without waiting for the result. + The simple strategy ( +\begin_inset Quotes eld +\end_inset + +chown, even if user goes over quota +\begin_inset Quotes erd +\end_inset + +, see ERS) will be followed. +\layout Enumerate + +For this part not more than 4 (four) lines of code may be added to mds_open. + Adding 0 lines to this function (the longest in Lustre) would be better. +\layout Enumerate + +Demonstrate handling recovery of 300,000 orphaned chown operations while + the cluster is in use already. +\layout Subsection + +Server Node Recovery +\layout Standard + +Note: in CMD nodes will be slaves for some uids and masters for others. + The algorithm outlined here handles the general case. +\layout Enumerate + +Nodes will recovery quota asynchronously, ie. + they will start normal operations, without waiting for quota recovery to + complete. +\layout Enumerate + + +\series bold +Slave recovery initiation: +\begin_deeper +\layout Enumerate + +Slave recovery is initiated on a per-connection basis +\begin_deeper +\layout Enumerate + +Upon obtaining a new connection to a server node that can be a master during + normal operations +\layout Enumerate + +Upon entering normal operations while connections are present +\end_deeper +\layout Enumerate + +The recovery is aborted if a connection fails. +\layout Enumerate + +A collection of threads is needed to handle this recovery +\layout Enumerate + +The quota file handling should be sufficiently concurrent that multiple + connections can recover in parallel +\end_deeper +\layout Enumerate + + +\series bold +Slave recovery: +\series default + +\begin_deeper +\layout Enumerate + +During normal use the node will iterate through all the users and groups + in the operational quota file. + +\layout Enumerate + +If the connection is not one to the master for this uid/gid go to the next + uid/gid. +\layout Enumerate + +If such a uid/gid is also found in the node's administrative quota file, + this node is the master for that id and this id will be skipped, else continue +\series bold +. +\layout Enumerate + +Release unreasonably high limits for this uid/gid. +\layout Enumerate + +The contexts used for updating quota from the filter should be design so + that these releases can be made concurrent with normal use. +\end_deeper +\layout Enumerate + + +\series bold +Master recovery initiation +\begin_deeper +\layout Enumerate + +Master recovery requires connections to all other servers, it is initiated: +\begin_deeper +\layout Enumerate + +If upon entering normal operations all connections are present +\layout Enumerate + +If during normal operation all connections reach a usable state +\end_deeper +\layout Enumerate + +It is aborted if any connection fails during master recovery +\end_deeper +\layout Enumerate + + +\series bold +Master recovery: +\begin_deeper +\layout Enumerate + +During normal use the master will iterate through the administrative quota + file. +\layout Enumerate + +It will lock quota operations on the master for that uid. +\layout Enumerate + +For each uid/gid found it will make +\series bold +a new quota related master to slave +\series default +RPC to all other servers and ask for the current limit (and usage). +\layout Enumerate + +If a response is obtained from all nodes, the operational limit on the master + node is updated so that the sum of all operational limits is the clusterwide + administrative limit. + +\layout Enumerate + +If a response is not obtained from all servers, abort. +\end_deeper +\layout Subsection + +Soft Limits +\layout Standard + +Soft quota is not enforced in fs layer on master or slave. + It's only enforced in obd layer on Master: +\layout Enumerate + +The grace time and soft start time will be kept in adminstrative file. +\layout Enumerate + +Master monitor the administrative usage on each qunit acquire/release handling: + log the soft start time once the administrative usage >= administrative + soft limit, clear the soft start time once the administrative usage < administr +ative soft limit. +\layout Enumerate + +Master will reject any qunit acquire request if soft start time + grace + time < current time. +\layout Standard + +Make sure we have unit tests and integration and system tests that verify + this comprehensively. +\layout Section + +Changelog +\layout Description + +2005/01/29 First draft. + Based on review of Zhaohongs writings and ERS. +\layout Description + +2005/02/06 Second draft, much more detail to aid the team +\the_end diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h index de915f8..d6f0cd9 100644 --- a/lustre/include/liblustre.h +++ b/lustre/include/liblustre.h @@ -724,5 +724,4 @@ int liblustre_wait_event(int timeout); #include #include - #endif diff --git a/lustre/include/linux/Makefile.am b/lustre/include/linux/Makefile.am index 4018e67..f0d145f 100644 --- a/lustre/include/linux/Makefile.am +++ b/lustre/include/linux/Makefile.am @@ -14,4 +14,5 @@ EXTRA_DIST = lprocfs_status.h lustre_debug.h lustre_ha.h lustre_lib.h \ lustre_net.h obd_class.h obd_ost.h obd_support.h lustre_commit_confd.h \ lustre_export.h lustre_log.h obd_echo.h \ lustre_compat25.h lustre_fsfilt.h lustre_import.h lustre_mds.h obd.h \ - lvfs.h lvfs_linux.h lustre_cfg.h lustre_lite.h lustre_idl.h + lvfs.h lvfs_linux.h lustre_cfg.h lustre_lite.h lustre_idl.h \ + lustre_quota.h diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h index 86774a9..ceb3a41 100644 --- a/lustre/include/linux/lustre_fsfilt.h +++ b/lustre/include/linux/lustre_fsfilt.h @@ -78,6 +78,13 @@ struct fsfilt_operations { int (* fs_read_record)(struct file *, void *, int size, loff_t *); int (* fs_setup)(struct super_block *sb); int (* fs_get_op_len)(int, struct fsfilt_objinfo *, int); + int (* fs_quotacheck)(struct super_block *sb, + struct obd_quotactl *oqctl); + int (* fs_quotactl)(struct super_block *sb, + struct obd_quotactl *oqctl); + int (* fs_quotainfo)(struct lustre_quota_info *lqi, int type, + int cmd); + int (* fs_dquot)(struct lustre_dquot *dquot, int cmd); }; extern int fsfilt_register_ops(struct fsfilt_operations *fs_ops); @@ -290,6 +297,33 @@ static inline int fsfilt_sync(struct obd_device *obd, struct super_block *sb) return obd->obd_fsops->fs_sync(sb); } +static inline int fsfilt_quotacheck(struct obd_device *obd, + struct super_block *sb, + struct obd_quotactl *oqctl) +{ + return obd->obd_fsops->fs_quotacheck(sb, oqctl); +} + +static inline int fsfilt_quotactl(struct obd_device *obd, + struct super_block *sb, + struct obd_quotactl *oqctl) +{ + return obd->obd_fsops->fs_quotactl(sb, oqctl); +} + +static inline int fsfilt_quotainfo(struct obd_device *obd, + struct lustre_quota_info *lqi, + int type, int cmd) +{ + return obd->obd_fsops->fs_quotainfo(lqi, type, cmd); +} + +static inline int fsfilt_dquot(struct obd_device *obd, + struct lustre_dquot *dquot, int cmd) +{ + return obd->obd_fsops->fs_dquot(dquot, cmd); +} + static inline int fsfilt_map_inode_pages(struct obd_device *obd, struct inode *inode, struct page **page, int pages, diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h index a5d4405..7a7688a 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/linux/lustre_idl.h @@ -234,6 +234,8 @@ typedef enum { OST_SAN_WRITE = 15, OST_SYNC = 16, OST_SET_INFO = 17, + OST_QUOTACHECK = 18, + OST_QUOTACTL = 19, OST_LAST_OPC } ost_cmd_t; #define OST_FIRST_OPC OST_REPLY @@ -251,9 +253,11 @@ typedef uint32_t obd_gid; typedef uint32_t obd_flag; typedef uint32_t obd_count; -#define OBD_FL_DELORPHAN (0x00000004) /* if set in o_flags delete orphans */ -#define OBD_FL_RECREATE_OBJS (0x00000020) // recreate missing obj -#define OBD_FL_DEBUG_CHECK (0x00000040) /* echo client/server debug check */ +#define OBD_FL_DELORPHAN (0x00000004) /* if set in o_flags delete orphans */ +#define OBD_FL_RECREATE_OBJS (0x00000020) /* recreate missing obj */ +#define OBD_FL_DEBUG_CHECK (0x00000040) /* echo client/server debug check */ +#define OBD_FL_NO_USRQUOTA (0x00000100) /* the object's owner is over quota */ +#define OBD_FL_NO_GRPQUOTA (0x00000200) /* the object's group is over quota */ #define OBD_INLINESZ 64 @@ -342,9 +346,12 @@ struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */ #define OBD_MD_FLEPOCH (0x04000000) /* ->ost write easize is epoch */ #define OBD_MD_FLGRANT (0x08000000) /* ost preallocation space grant */ #define OBD_MD_FLDIREA (0x10000000) /* dir's extended attribute data */ +#define OBD_MD_FLUSRQUOTA (0x20000000) +#define OBD_MD_FLGRPQUOTA (0x40000000) /* over quota flags sent back by ost */ #define OBD_MD_FLNOTOBD (~(OBD_MD_FLBLOCKS | OBD_MD_LINKNAME|\ OBD_MD_FLEASIZE | OBD_MD_FLHANDLE | OBD_MD_FLCKSUM|\ - OBD_MD_FLQOS | OBD_MD_FLOSCOPQ | OBD_MD_FLCOOKIE)) + OBD_MD_FLQOS | OBD_MD_FLOSCOPQ | OBD_MD_FLCOOKIE|\ + OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA)) static inline struct lustre_handle *obdo_handle(struct obdo *oa) @@ -378,14 +385,14 @@ extern void lustre_swab_obd_statfs (struct obd_statfs *os); /* ost_body.data values for OST_BRW */ -#define OBD_BRW_READ 0x01 -#define OBD_BRW_WRITE 0x02 -#define OBD_BRW_RWMASK (OBD_BRW_READ | OBD_BRW_WRITE) -#define OBD_BRW_SYNC 0x08 -#define OBD_BRW_CHECK 0x10 -#define OBD_BRW_FROM_GRANT 0x20 /* the osc manages this under llite */ -#define OBD_BRW_GRANTED 0x40 /* the ost manages this */ -#define OBD_BRW_DROP 0x80 /* drop the page after IO */ +#define OBD_BRW_READ 0x01 +#define OBD_BRW_WRITE 0x02 +#define OBD_BRW_RWMASK (OBD_BRW_READ | OBD_BRW_WRITE) +#define OBD_BRW_SYNC 0x08 +#define OBD_BRW_CHECK 0x10 +#define OBD_BRW_FROM_GRANT 0x20 /* the osc manages this under llite */ +#define OBD_BRW_GRANTED 0x40 /* the ost manages this */ +#define OBD_BRW_DROP 0x80 /* drop the page after IO */ #define OBD_OBJECT_EOF 0xffffffffffffffffULL @@ -453,6 +460,8 @@ typedef enum { MDS_SYNC = 44, MDS_DONE_WRITING = 45, MDS_SET_INFO = 46, + MDS_QUOTACHECK = 47, + MDS_QUOTACTL = 48, MDS_LAST_OPC } mds_cmd_t; @@ -528,6 +537,49 @@ struct mds_body { extern void lustre_swab_mds_body (struct mds_body *b); +/* XXX: same as if_dqinfo struct in kernel */ +struct obd_dqinfo { + __u64 dqi_bgrace; + __u64 dqi_igrace; + __u32 dqi_flags; + __u32 dqi_valid; +}; + +/* XXX: same as if_dqblk struct in kernel, plus one padding */ +struct obd_dqblk { + __u64 dqb_bhardlimit; + __u64 dqb_bsoftlimit; + __u64 dqb_curspace; + __u64 dqb_ihardlimit; + __u64 dqb_isoftlimit; + __u64 dqb_curinodes; + __u64 dqb_btime; + __u64 dqb_itime; + __u32 dqb_valid; + __u32 padding; +}; + +#define Q_QUOTACHECK 0x800100 +#define Q_INITQUOTA 0x800101 /* init slave limits */ +#define Q_GETOINFO 0x800102 /* get obd quota info */ +#define Q_GETOQUOTA 0x800103 /* get obd quotas */ + +#define Q_TYPESET(oqc, type) \ + ((oqc)->qc_type == type || (oqc)->qc_type == UGQUOTA) + +#define Q_GETOCMD(oqc) \ + ((oqc)->qc_cmd == Q_GETOINFO || (oqc)->qc_cmd == Q_GETOQUOTA) + +struct obd_quotactl { + __u32 qc_cmd; + __u32 qc_type; + __u32 qc_id; + __u32 qc_stat; + struct obd_dqinfo qc_dqinfo; + struct obd_dqblk qc_dqblk; +}; + +extern void lustre_swab_obd_quotactl(struct obd_quotactl *q); struct mds_rec_setattr { __u32 sa_opcode; @@ -826,6 +878,7 @@ typedef enum { typedef enum { OBD_PING = 400, OBD_LOG_CANCEL, + OBD_QC_CALLBACK, OBD_LAST_OPC } obd_cmd_t; #define OBD_FIRST_OPC OBD_PING @@ -857,6 +910,7 @@ typedef enum { OST_SZ_REC = LLOG_OP_MAGIC | (OST_SAN_WRITE << 8), OST_RAID1_REC = LLOG_OP_MAGIC | ((OST_SAN_WRITE + 1) << 8), MDS_UNLINK_REC = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) | REINT_UNLINK, + MDS_SETATTR_REC = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) | REINT_SETATTR, OBD_CFG_REC = LLOG_OP_MAGIC | 0x20000, PTL_CFG_REC = LLOG_OP_MAGIC | 0x30000, LLOG_GEN_REC = LLOG_OP_MAGIC | 0x40000, @@ -921,6 +975,16 @@ struct llog_unlink_rec { struct llog_rec_tail lur_tail; } __attribute__((packed)); +struct llog_setattr_rec { + struct llog_rec_hdr lsr_hdr; + obd_id lsr_oid; + obd_count lsr_ogen; + __u32 lsr_uid; + __u32 lsr_gid; + __u32 padding; + struct llog_rec_tail lsr_tail; +} __attribute__((packed)); + struct llog_size_change_rec { struct llog_rec_hdr lsc_hdr; struct ll_fid lsc_fid; @@ -1027,4 +1091,18 @@ static inline struct ll_fid *obdo_fid(struct obdo *oa) sizeof(struct llog_cookie)); } +/* qutoa */ +struct qunit_data { + __u32 qd_id; + __u32 qd_type; + __u32 qd_count; + __u32 qd_isblk; /* indicating if it's block quota */ +}; +extern void lustre_swab_qdata(struct qunit_data *d); + +typedef enum { + QUOTA_DQACQ = 601, + QUOTA_DQREL = 602, +} quota_cmd_t; + #endif diff --git a/lustre/include/linux/lustre_lib.h b/lustre/include/linux/lustre_lib.h index 380629a..86e0274 100644 --- a/lustre/include/linux/lustre_lib.h +++ b/lustre/include/linux/lustre_lib.h @@ -83,6 +83,10 @@ int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp, int target_handle_ping(struct ptlrpc_request *req); void target_committed_to_req(struct ptlrpc_request *req); +/* quotacheck callback, dqacq/dqrel callback handler */ +int target_handle_qc_callback(struct ptlrpc_request *req); +int target_handle_dqacq_callback(struct ptlrpc_request *req); + void target_cancel_recovery_timer(struct obd_device *obd); #define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */ @@ -438,9 +442,13 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, long) -#define OBD_IOC_LOV_SETSTRIPE _IOW ('f', 154, long) -#define OBD_IOC_LOV_GETSTRIPE _IOW ('f', 155, long) -#define OBD_IOC_LOV_SETEA _IOW ('f', 156, long) +#define OBD_IOC_LOV_SETSTRIPE _IOW ('f', 154, long) +#define OBD_IOC_LOV_GETSTRIPE _IOW ('f', 155, long) +#define OBD_IOC_LOV_SETEA _IOW ('f', 156, long) + +#define OBD_IOC_QUOTACHECK _IOW ('f', 160, int) +#define OBD_IOC_POLL_QUOTACHECK _IOR ('f', 161, struct if_quotacheck *) +#define OBD_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl *) #define OBD_IOC_MOUNTOPT _IOWR('f', 170, long) diff --git a/lustre/include/linux/lustre_log.h b/lustre/include/linux/lustre_log.h index 9266f13..9864e5d 100644 --- a/lustre/include/linux/lustre_log.h +++ b/lustre/include/linux/lustre_log.h @@ -68,8 +68,14 @@ struct llog_handle { } u; }; +struct llog_fill_rec_data { + obd_id lfd_id; /* object id */ + obd_count lfd_ogen; /* object group */ +}; + /* llog.c - general API */ typedef int (*llog_cb_t)(struct llog_handle *, struct llog_rec_hdr *, void *); +typedef int (*llog_fill_rec_cb_t)(struct llog_rec_hdr *rec, void *data); int llog_init_handle(struct llog_handle *handle, int flags, struct obd_uuid *uuid); int llog_process(struct llog_handle *loghandle, llog_cb_t cb, @@ -106,7 +112,7 @@ int llog_cleanup(struct llog_ctxt *); int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp); int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, struct llog_cookie *logcookies, - int numcookies); + int numcookies, llog_fill_rec_cb_t fill_cb); int llog_cancel(struct llog_ctxt *, struct lov_stripe_md *lsm, int count, struct llog_cookie *cookies, int flags); @@ -116,7 +122,8 @@ int llog_obd_origin_setup(struct obd_device *obd, int index, int llog_obd_origin_cleanup(struct llog_ctxt *ctxt); int llog_obd_origin_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, - struct llog_cookie *logcookies, int numcookies); + struct llog_cookie *logcookies, int numcookies, + llog_fill_rec_cb_t fill_cb); int llog_cat_initialize(struct obd_device *obd, int count); int obd_llog_init(struct obd_device *obd, struct obd_device *disk_obd, @@ -166,7 +173,8 @@ struct llog_operations { int (*lop_cleanup)(struct llog_ctxt *ctxt); int (*lop_add)(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, - struct llog_cookie *logcookies, int numcookies); + struct llog_cookie *logcookies, int numcookies, + llog_fill_rec_cb_t fill_cb); int (*lop_cancel)(struct llog_ctxt *ctxt, struct lov_stripe_md *lsm, int count, struct llog_cookie *cookies, int flags); int (*lop_connect)(struct llog_ctxt *ctxt, int count, diff --git a/lustre/include/linux/lustre_quota.h b/lustre/include/linux/lustre_quota.h new file mode 100644 index 0000000..7c51557 --- /dev/null +++ b/lustre/include/linux/lustre_quota.h @@ -0,0 +1,177 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef _LUSTRE_QUOTA_H +#define _LUSTRE_QUOTA_H + +#include +#include +#include + +/* XXX disable amdin quotafile delete dquot temporarily */ +#define QFMT_NO_DELETE 1 + +#define QUSG(count, isblk) (isblk ? toqb(count) : count) + +/* If the (quota limit < qunit * slave count), the slave which can't + * acquire qunit should set it's local limit as MIN_QLIMIT */ +#define MIN_QLIMIT 1 + +#ifndef NR_DQHASH +#define NR_DQHASH 45 +#endif + +/* structures to access admin quotafile */ +struct lustre_mem_dqinfo { + unsigned int dqi_bgrace; + unsigned int dqi_igrace; + unsigned long dqi_flags; + unsigned int dqi_blocks; + unsigned int dqi_free_blk; + unsigned int dqi_free_entry; +}; + +struct lustre_quota_info { + struct semaphore qi_sem; + struct file *qi_files[MAXQUOTAS]; + struct lustre_mem_dqinfo qi_info[MAXQUOTAS]; +}; + +struct lustre_dquot { + struct list_head dq_hash; + struct list_head dq_unused; + + /* this semaphore is unused until we implement wb dquot cache */ + struct semaphore dq_sem; + atomic_t dq_refcnt; + + struct lustre_quota_info *dq_info; + loff_t dq_off; + unsigned int dq_id; + int dq_type; + unsigned long dq_flags; + struct mem_dqblk dq_dqb; +}; + +#define QFILE_CHK 1 +#define QFILE_RD_INFO 2 +#define QFILE_WR_INFO 3 +#define QFILE_INIT_INFO 4 +#define QFILE_RD_DQUOT 5 +#define QFILE_WR_DQUOT 6 + +/* admin quotafile operations */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) +int lustre_check_quota_file(struct lustre_quota_info *lqi, int type); +int lustre_read_quota_info(struct lustre_quota_info *lqi, int type); +int lustre_write_quota_info(struct lustre_quota_info *lqi, int type); +int lustre_read_dquot(struct lustre_dquot *dquot); +int lustre_commit_dquot(struct lustre_dquot *dquot); +int lustre_init_quota_info(struct lustre_quota_info *lqi, int type); + +#else + +#ifndef DQ_FAKE_B +#define DQ_FAKE_B 6 +#endif + +static inline int lustre_check_quota_file(struct lustre_quota_info *lqi, + int type) +{ + return 0; +} +static inline int lustre_read_quota_info(struct lustre_quota_info *lqi, + int type) +{ + return 0; +} +static inline int lustre_write_quota_info(struct lustre_quota_info *lqi, + int type) +{ + return 0; +} +static inline int lustre_read_dquot(struct lustre_dquot *dquot) +{ + return 0; +} +static inline int lustre_commit_dquot(struct lustre_dquot *dquot) +{ + return 0; +} +static inline int lustre_init_quota_info(struct lustre_quota_info *lqi, + int type) +{ + return 0; +} +#endif /* KERNEL_VERSION(2,5,0) */ + +/* quota context structures */ +struct obd_device; +typedef int (*dqacq_handler_t) (struct obd_device * obd, struct qunit_data * qd, + int opc); + +struct lustre_quota_ctxt { + struct super_block *lqc_sb; + struct obd_import *lqc_import; + dqacq_handler_t lqc_handler; + unsigned long lqc_flags; + unsigned long lqc_iunit_sz; + unsigned long lqc_itune_sz; + unsigned long lqc_bunit_sz; + unsigned long lqc_btune_sz; +}; + +struct lustre_qunit { + struct list_head lq_hash; + atomic_t lq_refcnt; + struct lustre_quota_ctxt *lq_ctxt; + struct qunit_data lq_data; + unsigned int lq_opc; + struct list_head lq_waiters; +}; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) +int qctxt_init(struct lustre_quota_ctxt *qctxt, struct super_block *sb, + dqacq_handler_t handler); +void qctxt_cleanup(struct lustre_quota_ctxt *qctxt, int force); +int qctxt_adjust_qunit(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, + uid_t uid, gid_t gid, __u32 isblk); +int qctxt_wait_on_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, + uid_t uid, gid_t gid, __u32 isblk); +#else +static inline int qctxt_init(struct lustre_quota_ctxt *qctxt, + struct super_block *sb, dqacq_handler_t handler) +{ + return 0; +} +static inline void qctxt_cleanup(struct lustre_quota_ctxt *qctxt, int force) +{ + return; +} +static inline int qctxt_adjust_qunit(struct obd_device *obd, + struct lustre_quota_ctxt *qctxt, + uid_t uid, gid_t gid, __u32 isblk) +{ + return 0; +} +static inline int qctxt_wait_on_dqacq(struct obd_device *obd, + struct lustre_quota_ctxt *qctxt, + uid_t uid, gid_t gid, __u32 isblk) +{ + return 0; +} +#endif /* KERNEL_VERSION(2,5,0) */ + +/* quota check & quotactl */ +#define LUSTRE_ADMIN_QUOTAFILES {\ + "admin_quotafile.usr", /* user admin quotafile */\ + "admin_quotafile.grp" /* group admin quotafile */\ +} + +struct quotacheck_info { + struct completion qi_starting; + struct obd_export *qi_exp; + struct obd_quotactl qi_oqctl; +}; + +#endif /* _LUSTRE_QUOTA_H */ diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h index 0ac8a9e..4dca21b 100644 --- a/lustre/include/linux/obd.h +++ b/lustre/include/linux/obd.h @@ -30,6 +30,7 @@ #include #include #include +#include /* this is really local to the OSC */ struct loi_oap_pages { @@ -129,6 +130,7 @@ struct obd_async_page_ops { int (*ap_refresh_count)(void *data, int cmd); void (*ap_fill_obdo)(void *data, int cmd, struct obdo *oa); void (*ap_completion)(void *data, int cmd, struct obdo *oa, int rc); + void (*ap_get_ucred)(void *data, struct obd_ucred *ouc); }; /* the `oig' is passed down from a caller of obd rw methods. the callee @@ -222,6 +224,10 @@ struct filter_obd { struct obd_histogram fo_w_discont_blocks; struct obd_histogram fo_r_disk_iosize; struct obd_histogram fo_w_disk_iosize; + + struct lustre_quota_ctxt fo_quota_ctxt; + spinlock_t fo_quotacheck_lock; + atomic_t fo_quotachecking; }; struct mds_server_data; @@ -231,6 +237,11 @@ struct mds_server_data; #define OSC_MAX_DIRTY_DEFAULT 32 #define OSC_MAX_DIRTY_MB_MAX 512 /* totally arbitrary */ +enum { + CL_QUOTACHECKING = 1, + CL_NO_QUOTACHECK +}; + struct mdc_rpc_lock; struct client_obd { struct obd_import *cl_import; @@ -282,6 +293,10 @@ struct client_obd { /* also protected by the poorly named _loi_list_lock lock above */ struct osc_async_rc cl_ar; + + /* used by quotacheck */ + spinlock_t cl_qchk_lock; + int cl_qchk_stat; /* quotacheck stat of the peer */ }; /* Like a client, with some hangers-on. Keep mc_client_obd first so that we @@ -330,6 +345,9 @@ struct mds_obd { struct file *mds_lov_objid_filp; unsigned long *mds_client_bitmap; struct semaphore mds_orphan_recovery_sem; + struct lustre_quota_info mds_quota_info; + struct lustre_quota_ctxt mds_quota_ctxt; + atomic_t mds_quotachecking; }; struct echo_obd { @@ -460,18 +478,18 @@ static inline void oti_free_cookies(struct obd_trans_info *oti) /* llog contexts */ enum llog_ctxt_id { - LLOG_CONFIG_ORIG_CTXT = 0, - LLOG_CONFIG_REPL_CTXT = 1, - LLOG_UNLINK_ORIG_CTXT = 2, - LLOG_UNLINK_REPL_CTXT = 3, - LLOG_SIZE_ORIG_CTXT = 4, - LLOG_SIZE_REPL_CTXT = 5, - LLOG_MD_ORIG_CTXT = 6, - LLOG_MD_REPL_CTXT = 7, - LLOG_RD1_ORIG_CTXT = 8, - LLOG_RD1_REPL_CTXT = 9, - LLOG_TEST_ORIG_CTXT = 10, - LLOG_TEST_REPL_CTXT = 11, + LLOG_CONFIG_ORIG_CTXT = 0, + LLOG_CONFIG_REPL_CTXT = 1, + LLOG_MDS_OST_ORIG_CTXT = 2, + LLOG_MDS_OST_REPL_CTXT = 3, + LLOG_SIZE_ORIG_CTXT = 4, + LLOG_SIZE_REPL_CTXT = 5, + LLOG_MD_ORIG_CTXT = 6, + LLOG_MD_REPL_CTXT = 7, + LLOG_RD1_ORIG_CTXT = 8, + LLOG_RD1_REPL_CTXT = 9, + LLOG_TEST_ORIG_CTXT = 10, + LLOG_TEST_REPL_CTXT = 11, LLOG_MAX_CTXTS }; @@ -589,6 +607,8 @@ struct obd_ops { struct lov_stripe_md *ea, struct obd_trans_info *oti); int (*o_setattr)(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *ea, struct obd_trans_info *oti); + int (*o_setattr_async)(struct obd_export *exp, struct obdo *oa, + struct lov_stripe_md *ea, struct obd_trans_info *oti); int (*o_getattr)(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *ea); int (*o_getattr_async)(struct obd_export *exp, struct obdo *oa, @@ -691,6 +711,11 @@ struct obd_ops { int (*o_notify)(struct obd_device *obd, struct obd_device *watched, int active); + + /* quota methods */ + int (*o_quotacheck)(struct obd_export *, struct obd_quotactl *); + int (*o_quotactl)(struct obd_export *, struct obd_quotactl *); + /* * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c. diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index 960fbe5..e7d1afd 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -67,6 +67,8 @@ struct obd_device *class_uuid2obd(struct obd_uuid *uuid); struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid, char * typ_name, struct obd_uuid *grp_uuid); +struct obd_device * class_find_client_notype(struct obd_uuid *tgt_uuid, + struct obd_uuid *grp_uuid); struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, int *next); @@ -467,6 +469,21 @@ static inline int obd_setattr(struct obd_export *exp, struct obdo *obdo, RETURN(rc); } +static inline int obd_setattr_async(struct obd_export *exp, + struct obdo *obdo, + struct lov_stripe_md *ea, + struct obd_trans_info *oti) +{ + int rc; + ENTRY; + + EXP_CHECK_OP(exp, setattr_async); + OBD_COUNTER_INCREMENT(exp->exp_obd, setattr_async); + + rc = OBP(exp->exp_obd, setattr_async)(exp, obdo, ea, oti); + RETURN(rc); +} + static inline int obd_add_conn(struct obd_import *imp, struct obd_uuid *uuid, int priority) { @@ -983,6 +1000,33 @@ static inline int obd_notify(struct obd_device *obd, return OBP(obd, notify)(obd, watched, active); } +static inline int obd_quotacheck(struct obd_export *exp, + struct obd_quotactl *oqctl) +{ + int rc; + ENTRY; + + EXP_CHECK_OP(exp, quotacheck); + OBD_COUNTER_INCREMENT(exp->exp_obd, quotacheck); + + rc = OBP(exp->exp_obd, quotacheck)(exp, oqctl); + RETURN(rc); +} + +static inline int obd_quotactl(struct obd_export *exp, + struct obd_quotactl *oqctl) +{ + int rc; + ENTRY; + + EXP_CHECK_OP(exp, quotactl); + OBD_COUNTER_INCREMENT(exp->exp_obd, quotactl); + + rc = OBP(exp->exp_obd, quotactl)(exp, oqctl); + RETURN(rc); +} + + static inline int obd_register_observer(struct obd_device *obd, struct obd_device *observer) { @@ -1010,6 +1054,11 @@ static inline struct obdo *obdo_alloc(void) return oa; } +/* qunit hash stuff */ +extern kmem_cache_t *qunit_cachep; +extern struct list_head qunit_hash[]; +extern spinlock_t qunit_hash_lock; + static inline void obdo_free(struct obdo *oa) { OBD_SLAB_FREE(oa, obdo_cachep, sizeof(*oa)); @@ -1042,4 +1091,5 @@ int class_add_uuid(char *uuid, __u64 nid, __u32 nal); int class_del_uuid (char *uuid); void class_init_uuidlist(void); void class_exit_uuidlist(void); + #endif /* __LINUX_OBD_CLASS_H */ diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h index 4207a5e..2c54309 100644 --- a/lustre/include/linux/obd_support.h +++ b/lustre/include/linux/obd_support.h @@ -87,6 +87,9 @@ extern wait_queue_head_t obd_race_waitq; #define OBD_FAIL_MDS_PAUSE_OPEN 0x129 #define OBD_FAIL_MDS_STATFS_LCW_SLEEP 0x12a #define OBD_FAIL_MDS_OPEN_CREATE 0x12b +#define OBD_FAIL_MDS_OST_SETATTR 0x12c +#define OBD_FAIL_MDS_QUOTACHECK_NET 0x12d +#define OBD_FAIL_MDS_QUOTACTL_NET 0x12e #define OBD_FAIL_OST 0x200 #define OBD_FAIL_OST_CONNECT_NET 0x201 @@ -112,6 +115,8 @@ extern wait_queue_head_t obd_race_waitq; #define OBD_FAIL_OST_ENOSPC 0x215 #define OBD_FAIL_OST_EROFS 0x216 #define OBD_FAIL_OST_ENOENT 0x217 +#define OBD_FAIL_OST_QUOTACHECK_NET 0x218 +#define OBD_FAIL_OST_QUOTACTL_NET 0x219 #define OBD_FAIL_LDLM 0x300 #define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 @@ -145,6 +150,7 @@ extern wait_queue_head_t obd_race_waitq; #define OBD_FAIL_OBD_PING_NET 0x600 #define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601 #define OBD_FAIL_OBD_LOGD_NET 0x602 +#define OBD_FAIL_OBD_QC_CALLBACK_NET 0x603 #define OBD_FAIL_TGT_REPLY_NET 0x700 #define OBD_FAIL_TGT_CONN_RACE 0x701 diff --git a/lustre/include/lustre/liblustreapi.h b/lustre/include/lustre/liblustreapi.h index ab74c1d..6527f40 100644 --- a/lustre/include/lustre/liblustreapi.h +++ b/lustre/include/lustre/liblustreapi.h @@ -19,5 +19,8 @@ extern int llapi_target_check(int num_types, char **obd_types, char *dir); extern int llapi_catinfo(char *dir, char *keyword, char *node_name); extern int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count); extern int llapi_is_lustre_mnttype(char *type); - +extern int llapi_quotacheck(char *mnt, int check_type); +extern int llapi_poll_quotacheck(char *mnt, struct if_quotacheck *qchk); +extern int llapi_quotactl(char *mnt, struct if_quotactl *qctl); +extern int llapi_quotachog(char *path, int flag); #endif diff --git a/lustre/include/lustre/lustre_user.h b/lustre/include/lustre/lustre_user.h index 9de6b72..6d1faa3 100644 --- a/lustre/include/lustre/lustre_user.h +++ b/lustre/include/lustre/lustre_user.h @@ -9,6 +9,7 @@ #ifndef _LUSTRE_USER_H #define _LUSTRE_USER_H #include +#include #ifdef __KERNEL__ #include #else @@ -37,6 +38,9 @@ #define LL_IOC_RECREATE_OBJ _IOW ('f', 157, long) #define LL_IOC_GROUP_LOCK _IOW ('f', 158, long) #define LL_IOC_GROUP_UNLOCK _IOW ('f', 159, long) +#define LL_IOC_QUOTACHECK _IOW ('f', 160, int) +#define LL_IOC_POLL_QUOTACHECK _IOR ('f', 161, struct if_quotacheck *) +#define LL_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl *) #define IOC_MDC_TYPE 'i' #define IOC_MDC_GETSTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *) @@ -118,4 +122,47 @@ static inline void obd_str2uuid(struct obd_uuid *uuid, char *tmp) uuid->uuid[sizeof(*uuid) - 1] = '\0'; } +#define UGQUOTA 2 /* set both USRQUOTA and GRPQUOTA */ + +#define QFMT_LDISKFS 2 /* QFMT_VFS_V0(2), quota format for ldiskfs */ + +struct if_quotacheck { + char obd_type[10]; + struct obd_uuid obd_uuid; + int stat; +}; + +#ifndef __KERNEL__ +/* XXX: these two structs should be in /usr/include/linux/quota.h */ +struct if_dqinfo { + __u64 dqi_bgrace; + __u64 dqi_igrace; + __u32 dqi_flags; + __u32 dqi_valid; +}; + +struct if_dqblk { + __u64 dqb_bhardlimit; + __u64 dqb_bsoftlimit; + __u64 dqb_curspace; + __u64 dqb_ihardlimit; + __u64 dqb_isoftlimit; + __u64 dqb_curinodes; + __u64 dqb_btime; + __u64 dqb_itime; + __u32 dqb_valid; +}; +#endif + +struct if_quotactl { + int qc_cmd; + int qc_type; + int qc_id; + int qc_stat; + struct if_dqinfo qc_dqinfo; + struct if_dqblk qc_dqblk; + char obd_type[10]; + struct obd_uuid obd_uuid; +}; + #endif /* _LUSTRE_USER_H */ diff --git a/lustre/kernel_patches/patches/kallsyms-2.4-bgl.patch b/lustre/kernel_patches/patches/kallsyms-2.4-bgl.patch index 8f7188a..6ec26cd 100644 --- a/lustre/kernel_patches/patches/kallsyms-2.4-bgl.patch +++ b/lustre/kernel_patches/patches/kallsyms-2.4-bgl.patch @@ -252,7 +252,7 @@ Index: linux-bgl/kernel/kallsyms.c + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + -+#ident "$Id: kallsyms-2.4-bgl.patch,v 1.1.4.1 2004/10/29 00:51:21 jacob Exp $" ++#ident "$Id: kallsyms-2.4-bgl.patch,v 1.1.20.1 2005/03/24 22:50:28 jacob Exp $" + +/* + This code uses the list of all kernel and module symbols to :- @@ -568,7 +568,7 @@ Index: linux-bgl/include/linux/kallsyms.h + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + -+#ident "$Id: kallsyms-2.4-bgl.patch,v 1.1.4.1 2004/10/29 00:51:21 jacob Exp $" ++#ident "$Id: kallsyms-2.4-bgl.patch,v 1.1.20.1 2005/03/24 22:50:28 jacob Exp $" + +#ifndef MODUTILS_KALLSYMS_H +#define MODUTILS_KALLSYMS_H 1 diff --git a/lustre/ldiskfs/Makefile.in b/lustre/ldiskfs/Makefile.in index e2aff01..be51da2 100644 --- a/lustre/ldiskfs/Makefile.in +++ b/lustre/ldiskfs/Makefile.in @@ -1,6 +1,6 @@ default: all -MODULES := ldiskfs +MODULES := ldiskfs quotafmt_test # copy makefile over to not break patches ext3_extra := $(wildcard @LINUX@/fs/ext3/Makefile) @@ -11,9 +11,13 @@ linux_headers := $(wildcard @LINUX@/include/linux/ext3*.h) ext3_sources := $(filter-out %.mod.c,$(wildcard @LINUX@/fs/ext3/*.c)) new_sources := iopen.c iopen.h extents.c mballoc.c new_headers := ext3_extents.h -ldiskfs_sources := $(notdir $(ext3_sources) $(ext3_headers)) $(new_sources) $(new_headers) +quotafmt_sources := lustre_quota_fmt.c +quotafmt_headers := lustre_quota_fmt.h +ldiskfs_patched_sources := $(notdir $(ext3_sources) $(ext3_headers)) $(new_sources) $(new_headers) +ldiskfs_sources := $(ldiskfs_patched_sources) $(quotafmt_sources) $(quotafmt_headers) ldiskfs-objs := $(filter %.o,$(ldiskfs_sources:.c=.o)) +quotafmt-objs := quotafmt_test.o EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LUSTRE@ -I@LUSTRE@/ldiskfs diff --git a/lustre/ldiskfs/autoMakefile.am b/lustre/ldiskfs/autoMakefile.am index a31e01a..4f9e784 100644 --- a/lustre/ldiskfs/autoMakefile.am +++ b/lustre/ldiskfs/autoMakefile.am @@ -6,7 +6,7 @@ endif ldiskfs_linux_headers := $(addprefix linux/,$(subst ext3,ldiskfs,$(notdir $(linux_headers)))) -$(filter %.c,$(ldiskfs_sources)): sources $(ldiskfs_linux_headers) $(filter %.h,$(ldiskfs_sources)) +$(filter %.c,$(ldiskfs_patched_sources)): sources $(ldiskfs_linux_headers) $(filter %.h,$(ldiskfs_patched_sources)) ldiskfs_sed_flags = \ -e "s/dx_hash_info/ext3_dx_hash_info/g" \ @@ -71,7 +71,9 @@ foo-check: @echo "ldiskfs_LDADD: $(ldiskfs_LDADD)" MOSTLYCLEANFILES = *.o *.ko *.mod.c -CLEANFILES = sources *.c *.h +CLEANFILES = sources $(notdir $(linux_headers) $(ext3_headers) $(ext3_sources) $(new_sources) $(new_headers)) + +EXTRA_DIST := lustre_quota_fmt.c lustre_quota_fmt.h quotafmt_test.c clean: clean-am rm -rf linux linux-stage diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 0d7cc6f..026e92f 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -338,6 +338,9 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) cli->cl_mgmtcli_obd = mgmt_obd; } + spin_lock_init(&cli->cl_qchk_lock); + cli->cl_qchk_stat = CL_NO_QUOTACHECK; + RETURN(rc); err_import: @@ -1352,4 +1355,61 @@ void target_committed_to_req(struct ptlrpc_request *req) CDEBUG(D_INFO, "last_committed "LPU64", xid "LPU64"\n", obd->obd_last_committed, req->rq_xid); } + +int target_handle_qc_callback(struct ptlrpc_request *req) +{ + struct obd_quotactl *oqctl; + struct client_obd *cli = &req->rq_export->exp_obd->u.cli; + + oqctl = lustre_swab_reqbuf(req, 0, sizeof(*oqctl), + lustre_swab_obd_quotactl); + + spin_lock(&cli->cl_qchk_lock); + cli->cl_qchk_stat = oqctl->qc_stat; + spin_unlock(&cli->cl_qchk_lock); + + return 0; +} + +int target_handle_dqacq_callback(struct ptlrpc_request *req) +{ + struct obd_device *obd = req->rq_export->exp_obd; + struct obd_device *master_obd; + struct lustre_quota_ctxt *qctxt; + struct qunit_data *qdata, *rep; + int rc = 0, repsize = sizeof(struct qunit_data); + ENTRY; + + rc = lustre_pack_reply(req, 1, &repsize, NULL); + if (rc) { + CERROR("packing reply failed!: rc = %d\n", rc); + RETURN(rc); + } + rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*rep)); + LASSERT(rep); + + qdata = lustre_swab_reqbuf(req, 0, sizeof(*qdata), lustre_swab_qdata); + if (qdata == NULL) { + CERROR("unpacking request buffer failed!"); + RETURN(-EPROTO); + } + + /* we use the observer */ + LASSERT(obd->obd_observer && obd->obd_observer->obd_observer); + master_obd = obd->obd_observer->obd_observer; + qctxt = &master_obd->u.mds.mds_quota_ctxt; + + LASSERT(qctxt->lqc_handler); + rc = qctxt->lqc_handler(master_obd, qdata, req->rq_reqmsg->opc); + if (rc && rc != -EDQUOT) + CERROR("dqacq failed! (rc:%d)\n", rc); + + /* the qd_count might be changed in lqc_handler */ + memcpy(rep, qdata, sizeof(*rep)); + req->rq_status = rc; + rc = ptlrpc_reply(req); + + RETURN(rc); +} + EXPORT_SYMBOL(target_committed_to_req); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index f74881b..450e2cf 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -1142,6 +1142,16 @@ static int ldlm_callback_handler(struct ptlrpc_request *req) rc = llog_origin_handle_cancel(req); ldlm_callback_reply(req, rc); RETURN(0); + case OBD_QC_CALLBACK: + OBD_FAIL_RETURN(OBD_FAIL_OBD_QC_CALLBACK_NET, 0); + rc = target_handle_qc_callback(req); + ldlm_callback_reply(req, rc); + RETURN(0); + case QUOTA_DQACQ: + case QUOTA_DQREL: + /* reply in handler */ + rc = target_handle_dqacq_callback(req); + RETURN(0); case LLOG_ORIGIN_HANDLE_CREATE: OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0); rc = llog_origin_handle_create(req); diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index 43f884e..63e09bc 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -367,6 +367,27 @@ done: RETURN(rc); } +#define Q_CONV(tgt, src, member) (tgt)->member = (src)->member + +#define QCTLCONV(tgt, src) \ +do { \ + Q_CONV(tgt, src, qc_cmd); \ + Q_CONV(tgt, src, qc_type); \ + Q_CONV(tgt, src, qc_id); \ + Q_CONV(tgt, src, qc_stat); \ + Q_CONV(tgt, src, qc_dqinfo.dqi_bgrace); \ + Q_CONV(tgt, src, qc_dqinfo.dqi_igrace); \ + Q_CONV(tgt, src, qc_dqinfo.dqi_flags); \ + Q_CONV(tgt, src, qc_dqblk.dqb_ihardlimit); \ + Q_CONV(tgt, src, qc_dqblk.dqb_isoftlimit); \ + Q_CONV(tgt, src, qc_dqblk.dqb_curinodes); \ + Q_CONV(tgt, src, qc_dqblk.dqb_bhardlimit); \ + Q_CONV(tgt, src, qc_dqblk.dqb_bsoftlimit); \ + Q_CONV(tgt, src, qc_dqblk.dqb_curspace); \ + Q_CONV(tgt, src, qc_dqblk.dqb_btime); \ + Q_CONV(tgt, src, qc_dqblk.dqb_itime); \ +} while (0) + static int ll_dir_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { @@ -654,6 +675,171 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, obd_ioctl_freedata(buf, len); RETURN(rc); } + case OBD_IOC_QUOTACHECK: { + struct obd_quotactl oqctl = { 0, }; + int rc, error = 0; + + if (!capable(CAP_SYS_ADMIN)) + RETURN(-EPERM); + + oqctl.qc_type = arg; + rc = obd_quotacheck(sbi->ll_mdc_exp, &oqctl); + if (rc < 0) { + CDEBUG(D_INFO, "mdc_quotacheck failed: rc %d\n", rc); + error = rc; + } + + rc = obd_quotacheck(sbi->ll_osc_exp, &oqctl); + if (rc < 0) + CDEBUG(D_INFO, "osc_quotacheck failed: rc %d\n", rc); + + if (error) + rc = error; + return rc; + } + case OBD_IOC_POLL_QUOTACHECK: { + struct if_quotacheck check; + int rc; + + if (!capable(CAP_SYS_ADMIN)) + RETURN(-EPERM); + + rc = obd_iocontrol(cmd, sbi->ll_mdc_exp, 0, (void *)&check, + NULL); + if (check.stat == -ENODATA) + rc = check.stat; + if (rc) { + CDEBUG(D_QUOTA, "mdc ioctl %d failed: rc %d\n", + cmd, check.stat); + if (copy_to_user((void *)arg, &check, sizeof(check))) + RETURN(-EFAULT); + RETURN(rc); + } + + rc = obd_iocontrol(cmd, sbi->ll_osc_exp, 0, (void *)&check, + NULL); + if (check.stat == -ENODATA) + rc = check.stat; + if (rc) { + CDEBUG(D_QUOTA, "osc ioctl %d failed: rc %d\n", + cmd, rc); + if (copy_to_user((void *)arg, &check, sizeof(check))) + RETURN(-EFAULT); + RETURN(rc); + } + + RETURN(0); + } + case OBD_IOC_QUOTACTL: { + struct if_quotactl qctl; + struct obd_quotactl oqctl; + + int cmd, type, id, rc = 0, error = 0; + + if (copy_from_user(&qctl, (void *)arg, sizeof(qctl))) + RETURN(-EFAULT); + + cmd = qctl.qc_cmd; + type = qctl.qc_type; + id = qctl.qc_id; + switch (cmd) { + case Q_QUOTAON: + case Q_QUOTAOFF: + case Q_SETQUOTA: + case Q_SETINFO: + if (!capable(CAP_SYS_ADMIN)) + RETURN(-EPERM); + break; + case Q_GETQUOTA: + if (((type == USRQUOTA && current->euid != id) || + (type == GRPQUOTA && !in_egroup_p(id))) && + !capable(CAP_SYS_ADMIN)) + RETURN(-EPERM); + break; + case Q_GETINFO: + break; + default: + RETURN(-EINVAL); + } + + QCTLCONV(&oqctl, &qctl); + + if (qctl.obd_uuid.uuid[0]) { + struct obd_device *obd; + struct obd_uuid *uuid = &qctl.obd_uuid; + + if (cmd == Q_GETINFO) + oqctl.qc_cmd = Q_GETOINFO; + else if (cmd == Q_GETQUOTA) + oqctl.qc_cmd = Q_GETOQUOTA; + else + RETURN(-EINVAL); + + rc = -ENOENT; + obd = class_find_client_notype(uuid, + &sbi->ll_osc_exp->exp_obd->obd_uuid); + if (!obd) + RETURN(rc); + + if (sbi->ll_mdc_exp->exp_obd == obd) { + rc = obd_quotactl(sbi->ll_mdc_exp, &oqctl); + } else { + int i; + struct obd_export *exp; + struct lov_obd *lov = &sbi->ll_osc_exp-> + exp_obd->u.lov; + + for (i = 0; i < lov->desc.ld_tgt_count; i++) { + exp = lov->tgts[i].ltd_exp; + + if (!lov->tgts[i].active) + continue; + + if (exp->exp_obd == obd) { + rc = obd_quotactl(exp, &oqctl); + break; + } + } + } + + QCTLCONV(&qctl, &oqctl); + + if (copy_to_user((void *)arg, &qctl, sizeof(qctl))) + RETURN(-EFAULT); + + RETURN(rc); + } + + if (cmd == Q_SETQUOTA) + oqctl.qc_dqblk.dqb_valid = QIF_LIMITS; + + rc = obd_quotactl(sbi->ll_mdc_exp, &oqctl); + if (rc) { + if (rc == -EBUSY && cmd == Q_QUOTAON) + error = rc; + else + RETURN(rc); + } + + if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF) { + rc = obd_quotactl(sbi->ll_osc_exp, &oqctl); + if (rc) { + if (rc != -EBUSY && cmd == Q_QUOTAON) { + oqctl.qc_cmd = Q_QUOTAOFF; + obd_quotactl(sbi->ll_mdc_exp, &oqctl); + obd_quotactl(sbi->ll_osc_exp, &oqctl); + } + RETURN(rc); + } + } + + QCTLCONV(&qctl, &oqctl); + + if (copy_to_user((void *)arg, &qctl, sizeof(qctl))) + return -EFAULT; + + RETURN(rc?:error); + } default: return obd_iocontrol(cmd, sbi->ll_osc_exp,0,NULL,(void *)arg); } diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 50c6d11..4aa8183 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -215,6 +215,8 @@ struct ll_async_page { llap_origin:3, llap_ra_used:1; struct list_head llap_pglist_item; + /* user credit information for oss enforcement quota */ + struct obd_ucred llap_ouc; }; enum { diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index e6d8a2a..0b008a5 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -347,7 +347,8 @@ void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa) mdc_pack_fid(obdo_fid(oa), inode->i_ino, 0, inode->i_mode); oa->o_easize = ll_i2info(inode)->lli_io_epoch; - valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME; + valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME | + OBD_MD_FLUID | OBD_MD_FLGID; } obdo_from_inode(oa, inode, valid_flags); @@ -368,11 +369,26 @@ static void ll_ap_fill_obdo(void *data, int cmd, struct obdo *oa) EXIT; } +static void ll_ap_get_ucred(void *data, struct obd_ucred *ouc) +{ + struct ll_async_page *llap; + + llap = llap_from_cookie(data); + if (IS_ERR(llap)) { + EXIT; + return; + } + + memcpy(ouc, &llap->llap_ouc, sizeof(*ouc)); + EXIT; +} + static struct obd_async_page_ops ll_async_page_ops = { .ap_make_ready = ll_ap_make_ready, .ap_refresh_count = ll_ap_refresh_count, .ap_fill_obdo = ll_ap_fill_obdo, .ap_completion = ll_ap_completion, + .ap_get_ucred = ll_ap_get_ucred, }; struct ll_async_page *llap_cast_private(struct page *page) @@ -520,6 +536,7 @@ struct ll_async_page *llap_from_page(struct page *page, unsigned origin) RETURN(ERR_PTR(-ENOMEM)); llap->llap_magic = LLAP_MAGIC; llap->llap_cookie = (void *)llap + size_round(sizeof(*llap)); + rc = obd_prep_async_page(exp, ll_i2info(inode)->lli_smd, NULL, page, (obd_off)page->index << PAGE_SHIFT, &ll_async_page_ops, llap, &llap->llap_cookie); @@ -624,6 +641,7 @@ int ll_commit_write(struct file *file, struct page *page, unsigned from, struct lov_stripe_md *lsm = lli->lli_smd; struct obd_export *exp; struct ll_async_page *llap; + struct ll_uctxt ctxt; loff_t size; int rc = 0; ENTRY; @@ -643,6 +661,13 @@ int ll_commit_write(struct file *file, struct page *page, unsigned from, if (exp == NULL) RETURN(-EINVAL); + /* set user credit information for this page */ + llap->llap_ouc.ouc_fsuid = current->fsuid; + llap->llap_ouc.ouc_fsgid = current->fsgid; + llap->llap_ouc.ouc_cap = current->cap_effective; + ll_i2uctxt(&ctxt, inode, NULL); + llap->llap_ouc.ouc_suppgid1 = ctxt.gid1; + /* queue a write for some time in the future the first time we * dirty the page */ if (!PageDirty(page)) { diff --git a/lustre/lov/lov_log.c b/lustre/lov/lov_log.c index 7a52d72..6537be4 100644 --- a/lustre/lov/lov_log.c +++ b/lustre/lov/lov_log.c @@ -58,34 +58,31 @@ * Unset cookies should be all-zero (which will never occur naturally). */ static int lov_llog_origin_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, - struct llog_cookie *logcookies, int numcookies) + struct llog_cookie *logcookies, int numcookies, + llog_fill_rec_cb_t fill_cb) { struct obd_device *obd = ctxt->loc_obd; struct lov_obd *lov = &obd->u.lov; struct lov_oinfo *loi; - struct llog_unlink_rec *lur; int i, rc = 0; ENTRY; - OBD_ALLOC(lur, sizeof(*lur)); - if (!lur) - RETURN(-ENOMEM); - lur->lur_hdr.lrh_len = lur->lur_tail.lrt_len = sizeof(*lur); - lur->lur_hdr.lrh_type = MDS_UNLINK_REC; - LASSERT(logcookies && numcookies >= lsm->lsm_stripe_count); for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) { struct obd_device *child = lov->tgts[loi->loi_ost_idx].ltd_exp->exp_obd; struct llog_ctxt *cctxt = llog_get_context(child, ctxt->loc_idx); + struct llog_fill_rec_data data; + + /* fill mds unlink/setattr log record */ + data.lfd_id = loi->loi_id; + data.lfd_ogen = loi->loi_gr; + fill_cb(rec, &data); - lur->lur_oid = loi->loi_id; - lur->lur_ogen = loi->loi_gr; - rc += llog_add(cctxt, &lur->lur_hdr, NULL, logcookies + rc, - numcookies - rc); + rc += llog_add(cctxt, rec, NULL, logcookies + rc, + numcookies - rc, fill_cb); } - OBD_FREE(lur, sizeof(*lur)); RETURN(rc); } @@ -155,7 +152,7 @@ static int lov_llog_repl_cancel(struct llog_ctxt *ctxt, struct lov_stripe_md *ls RETURN(rc); } -static struct llog_operations lov_unlink_orig_logops = { +static struct llog_operations lov_mds_ost_orig_logops = { lop_add: lov_llog_origin_add, lop_connect: lov_llog_origin_connect }; @@ -172,8 +169,8 @@ int lov_llog_init(struct obd_device *obd, struct obd_device *tgt, int i, rc = 0; ENTRY; - rc = llog_setup(obd, LLOG_UNLINK_ORIG_CTXT, tgt, 0, NULL, - &lov_unlink_orig_logops); + rc = llog_setup(obd, LLOG_MDS_OST_ORIG_CTXT, tgt, 0, NULL, + &lov_mds_ost_orig_logops); if (rc) RETURN(rc); @@ -205,7 +202,7 @@ int lov_llog_finish(struct obd_device *obd, int count) /* cleanup our llogs only if the ctxts have been setup * (client lov doesn't setup, mds lov does). */ - ctxt = llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT); + ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT); if (ctxt) rc = llog_cleanup(ctxt); diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 05bc98c..3f212f3 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -1001,6 +1001,55 @@ static int lov_setattr(struct obd_export *exp, struct obdo *src_oa, RETURN(rc); } +static int lov_setattr_async(struct obd_export *exp, struct obdo *src_oa, + struct lov_stripe_md *lsm, struct obd_trans_info *oti) +{ + struct lov_obd *lov; + struct lov_oinfo *loi = NULL; + int rc = 0, err; + obd_id objid = src_oa->o_id; + int i; + ENTRY; + + ASSERT_LSM_MAGIC(lsm); + LASSERT(oti); + if (src_oa->o_valid & OBD_MD_FLCOOKIE) + LASSERT(oti->oti_logcookies); + + if (!exp || !exp->exp_obd) + RETURN(-ENODEV); + + /* support OBD_MD_FLUID, OBD_MD_FLGID and OBD_MD_FLCOOKIE now */ + LASSERT(!(src_oa->o_valid & ~(OBD_MD_FLID | OBD_MD_FLUID | + OBD_MD_FLGID| OBD_MD_FLCOOKIE))); + lov = &exp->exp_obd->u.lov; + + loi = lsm->lsm_oinfo; + for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) { + if (lov->tgts[loi->loi_ost_idx].active == 0) { + CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); + goto next; + } + + src_oa->o_id = loi->loi_id; + /* do chown/chgrp on OST asynchronously */ + err = obd_setattr_async(lov->tgts[loi->loi_ost_idx].ltd_exp, + src_oa, NULL, oti); + if (err) { + CERROR("error: setattr objid "LPX64" subobj " + LPX64" on OST idx %d: rc = %d\n", + objid, src_oa->o_id, i, err); + if (!rc) + rc = err; + } + next: + if (src_oa->o_valid & OBD_MD_FLCOOKIE) + oti->oti_logcookies++; + } + + RETURN(rc); +} + /* FIXME: maybe we'll just make one node the authoritative attribute node, then * we can send this 'punch' to just the authoritative node and the nodes * that the punch will affect. */ @@ -1253,11 +1302,19 @@ static void lov_ap_completion(void *data, int cmd, struct obdo *oa, int rc) lap->lap_caller_ops->ap_completion(lap->lap_caller_data, cmd, oa, rc); } +static void lov_ap_get_ucred(void *data, struct obd_ucred *ouc) +{ + struct lov_async_page *lap = LAP_FROM_COOKIE(data); + + lap->lap_caller_ops->ap_get_ucred(lap->lap_caller_data, ouc); +} + static struct obd_async_page_ops lov_async_page_ops = { .ap_make_ready = lov_ap_make_ready, .ap_refresh_count = lov_ap_refresh_count, .ap_fill_obdo = lov_ap_fill_obdo, .ap_completion = lov_ap_completion, + .ap_get_ucred = lov_ap_get_ucred, }; int lov_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm, @@ -2093,6 +2150,64 @@ int lov_complete_many(struct obd_export *exp, struct lov_stripe_md *lsm, } #endif +static int lov_quotacheck(struct obd_export *exp, struct obd_quotactl *oqctl) +{ + struct obd_device *obd = class_exp2obd(exp); + struct lov_obd *lov = &obd->u.lov; + int i, rc = 0; + ENTRY; + + for (i = 0; i < lov->desc.ld_tgt_count; i++) { + int err; + + if (!lov->tgts[i].active) { + CDEBUG(D_HA, "lov idx %d inactive\n", i); + continue; + } + + err = obd_quotacheck(lov->tgts[i].ltd_exp, oqctl); + if (err) { + if (lov->tgts[i].active && !rc) + rc = err; + continue; + } + } + + RETURN(rc); +} + +static int lov_quotactl(struct obd_export *exp, struct obd_quotactl *oqctl) +{ + struct obd_device *obd = class_exp2obd(exp); + struct lov_obd *lov = &obd->u.lov; + __u64 curspace = oqctl->qc_dqblk.dqb_curspace; + int i, rc = 0; + ENTRY; + + for (i = 0; i < lov->desc.ld_tgt_count; i++) { + int err; + + if (!lov->tgts[i].active) { + CDEBUG(D_HA, "lov idx %d inactive\n", i); + continue; + } + + err = obd_quotactl(lov->tgts[i].ltd_exp, oqctl); + if (err) { + if (lov->tgts[i].active && !rc) + rc = err; + continue; + } + + if (oqctl->qc_cmd == Q_GETQUOTA) + curspace += oqctl->qc_dqblk.dqb_curspace; + } + + if (oqctl->qc_cmd == Q_GETQUOTA) + oqctl->qc_dqblk.dqb_curspace = curspace; + RETURN(rc); +} + struct obd_ops lov_obd_ops = { .o_owner = THIS_MODULE, .o_setup = lov_setup, @@ -2108,6 +2223,7 @@ struct obd_ops lov_obd_ops = { .o_getattr = lov_getattr, .o_getattr_async = lov_getattr_async, .o_setattr = lov_setattr, + .o_setattr_async = lov_setattr_async, .o_brw = lov_brw, .o_brw_async = lov_brw_async, .o_prep_async_page = lov_prep_async_page, @@ -2131,6 +2247,8 @@ struct obd_ops lov_obd_ops = { .o_llog_init = lov_llog_init, .o_llog_finish = lov_llog_finish, .o_notify = lov_notify, + .o_quotacheck = lov_quotacheck, + .o_quotactl = lov_quotactl, }; int __init lov_init(void) diff --git a/lustre/lvfs/Makefile.in b/lustre/lvfs/Makefile.in index 791e48d..ac5a8a2 100644 --- a/lustre/lvfs/Makefile.in +++ b/lustre/lvfs/Makefile.in @@ -1,6 +1,8 @@ -MODULES := lvfs fsfilt_@BACKINGFS@ +MODULES := lvfs fsfilt_@BACKINGFS@ quotactl_test quotacheck_test lvfs-objs := lvfs_common.o lvfs_linux.o fsfilt.o +quotactl-objs := quotactl_test.o +quotaccheck-objs := quotacheck_test.o ifeq ($(PATCHLEVEL),6) fsfilt_@BACKINGFS@-objs := fsfilt-@BACKINGFS@.o diff --git a/lustre/lvfs/autoMakefile.am b/lustre/lvfs/autoMakefile.am index 0b642f7..814d299 100644 --- a/lustre/lvfs/autoMakefile.am +++ b/lustre/lvfs/autoMakefile.am @@ -42,7 +42,8 @@ sources: endif # MODULES DIST_SOURCES = fsfilt.c fsfilt_ext3.c fsfilt_reiserfs.c lvfs_common.c \ - lvfs_internal.h lvfs_linux.c lvfs_userfs.c + lvfs_internal.h lvfs_linux.c lvfs_userfs.c \ + quotacheck_test.c quotactl_test.c MOSTLYCLEANFILES = *.o *.ko *.mod.c CLEANFILES = fsfilt-*.c fsfilt_ldiskfs.c fsfilt_extN.c sources diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c index cd6d693..eb890e5 100644 --- a/lustre/lvfs/fsfilt_ext3.c +++ b/lustre/lvfs/fsfilt_ext3.c @@ -36,6 +36,9 @@ #include #include #include +#include +#include +#include #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) #include #else @@ -46,6 +49,7 @@ #include #include #include +#include #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) #include #endif @@ -148,6 +152,8 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private, case FSFILT_OP_SETATTR: /* Setattr on inode */ nblocks += 1; + nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS + + EXT3_DATA_TRANS_BLOCKS; break; case FSFILT_OP_CANCEL_UNLINK: /* blocks for log header bitmap update OR @@ -1171,6 +1177,549 @@ static int fsfilt_ext3_get_op_len(int op, struct fsfilt_objinfo *fso, int logs) return 0; } +static const char *op_quotafile[] = { "aquota.user", "aquota.group" }; + +static int fsfilt_ext3_quotactl(struct super_block *sb, + struct obd_quotactl *oqc) +{ + int i, rc = 0, error = 0; + struct if_dqinfo *info = (struct if_dqinfo *)&oqc->qc_dqinfo; + struct if_dqblk *dqblk = (struct if_dqblk *)&oqc->qc_dqblk; + ENTRY; + + /* XXX: quotaoff */ + return -EOPNOTSUPP; + + for (i = 0; i < MAXQUOTAS; i++) if (Q_TYPESET(oqc, i)) { + switch (oqc->qc_cmd) { + case Q_QUOTAON: { + rc = sb->s_qcop->quota_on(sb, i, oqc->qc_id, + (char *)op_quotafile[i]); + if (rc == -EBUSY) + error = rc; + else if (rc) + GOTO(out, rc); + break; + } + case Q_QUOTAOFF: { + sb->s_qcop->quota_off(sb, i); + break; + } + case Q_GETOINFO: + case Q_GETINFO: + rc = sb->s_qcop->get_info(sb, i, info); + GOTO(out, rc); + case Q_SETQUOTA: + rc = sb->s_qcop->set_dqblk(sb, i, oqc->qc_id, dqblk); + GOTO(out, rc); + case Q_GETOQUOTA: + case Q_GETQUOTA: + rc = sb->s_qcop->get_dqblk(sb, i, oqc->qc_id, dqblk); + GOTO(out, rc); + default: + CERROR("unsupported quotactl command: %d", oqc->qc_cmd); + LBUG(); + } + } +out: + if (!rc && error) + rc = error; + RETURN(rc); +} + +struct chkquot { + struct hlist_node cq_hash; + struct list_head cq_list; + qid_t cq_id; + short cq_type; + __u32 cq_bhardlimit; + __u32 cq_bsoftlimit; + qsize_t cq_curspace; + __u32 cq_ihardlimit; + __u32 cq_isoftlimit; + __u32 cq_curinodes; + __u64 cq_btime; + __u64 cq_itime; +}; + +static inline unsigned int const +chkquot_hash(qid_t id, int type) +{ + return (id * (MAXQUOTAS - type)) % NR_DQHASH; +} + +static inline struct chkquot * +find_chkquot(struct hlist_head *head, qid_t id, int type) +{ + struct hlist_node *node, *next; + struct chkquot *cq = NULL; + + hlist_for_each_safe(node, next, head) { + cq = hlist_entry(node, struct chkquot, cq_hash); + if (cq->cq_id == id && cq->cq_type == type) + return cq; + } + + return NULL; +} + +static struct chkquot *alloc_chkquot(qid_t id, int type) +{ + struct chkquot *cq; + + OBD_ALLOC(cq, sizeof(*cq)); + if (cq) { +#if 0 /* XXX: 2.4 doesn't support this macro */ + INIT_HLIST_NODE(&cq->cq_hash); +#endif + INIT_LIST_HEAD(&cq->cq_list); + cq->cq_id = id; + cq->cq_type = type; + } + + return cq; +} + +static struct chkquot * +cqget(struct super_block *sb, struct hlist_head *hash, struct list_head *list, + qid_t id, int type, int first_check) +{ + struct hlist_head *head = hash + chkquot_hash(id, type); + struct if_dqblk dqb; + struct chkquot *cq; + int rc; + + cq = find_chkquot(head, id, type); + if (cq) + return cq; + + cq = alloc_chkquot(id, type); + if (!cq) + return NULL; + + if (!first_check) { + rc = sb->s_qcop->get_dqblk(sb, type, id, &dqb); + if (!rc) { + cq->cq_bhardlimit = dqb.dqb_bhardlimit; + cq->cq_bsoftlimit = dqb.dqb_bsoftlimit; + cq->cq_ihardlimit = dqb.dqb_ihardlimit; + cq->cq_isoftlimit = dqb.dqb_isoftlimit; + cq->cq_btime = dqb.dqb_btime; + cq->cq_itime = dqb.dqb_itime; + } + } + + hlist_add_head(&cq->cq_hash, head); + list_add_tail(&cq->cq_list, list); + + return cq; +} + +static inline int quota_onoff(struct super_block *sb, int cmd, int type) +{ + struct obd_quotactl *oqctl; + int rc; + + OBD_ALLOC(oqctl, sizeof(*oqctl)); + + oqctl->qc_cmd = cmd; + oqctl->qc_id = QFMT_LDISKFS; + oqctl->qc_type = type; + rc = fsfilt_ext3_quotactl(sb, oqctl); + + OBD_FREE(oqctl, sizeof(*oqctl)); + return rc; +} + +static inline void read_old_dqinfo(struct super_block *sb, int type, + struct if_dqinfo *dqinfo) +{ + struct obd_quotactl *oqctl; + int rc; + ENTRY; + + OBD_ALLOC(oqctl, sizeof(*oqctl)); + + oqctl->qc_cmd = Q_GETINFO; + oqctl->qc_type = type; + rc = fsfilt_ext3_quotactl(sb, oqctl); + if (!rc) + memcpy(dqinfo + type, &oqctl->qc_dqinfo, sizeof(*dqinfo)); + + OBD_FREE(oqctl, sizeof(*oqctl)); + EXIT; +} + +static inline struct ext3_group_desc * +get_group_desc(struct super_block *sb, int group) +{ + unsigned long desc_block, desc; + struct ext3_group_desc *gdp; + + desc_block = group / EXT3_DESC_PER_BLOCK(sb); + desc = group % EXT3_DESC_PER_BLOCK(sb); + gdp = (struct ext3_group_desc *) + EXT3_SB(sb)->s_group_desc[desc_block]->b_data; + + return gdp + desc; +} + +static inline struct buffer_head * +read_inode_bitmap(struct super_block *sb, unsigned long group) +{ + struct ext3_group_desc *desc; + struct buffer_head *bh; + + desc = get_group_desc(sb, group); + bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); + + return bh; +} + +static inline struct inode *ext3_iget_inuse(struct super_block *sb, + struct buffer_head *bitmap_bh, + int index, unsigned long ino) +{ + struct inode *inode = NULL; + + if (ext3_test_bit(index, bitmap_bh->b_data)) + inode = iget(sb, ino); + + return inode; +} + +struct qchk_ctxt { + struct hlist_head hash[NR_DQHASH]; + struct list_head list; + int first_check[MAXQUOTAS]; + struct if_dqinfo dqinfo[MAXQUOTAS]; +}; + +static int add_inode_quota(struct inode *inode, struct qchk_ctxt *qctxt, + struct obd_quotactl *oqc) +{ + struct chkquot *cq; + loff_t size = 0; + qid_t qid[MAXQUOTAS]; + int cnt; + + if (!inode) + return 0; + + qid[USRQUOTA] = inode->i_uid; + qid[GRPQUOTA] = inode->i_gid; + + if (S_ISDIR(inode->i_mode) || + S_ISREG(inode->i_mode) || + S_ISLNK(inode->i_mode)) + size = inode_get_bytes(inode); + + for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (Q_TYPESET(oqc, cnt)) { + cq = cqget(inode->i_sb, qctxt->hash, &qctxt->list, qid[cnt], + cnt, qctxt->first_check[cnt]); + if (!cq) + return -ENOMEM; + + cq->cq_curspace += size; + cq->cq_curinodes ++; + } + + return 0; +} + +static int v2_write_dqheader(struct file *f, int type) +{ + int quota_magics[] = V2_INITQMAGICS; + int quota_versions[] = V2_INITQVERSIONS; + struct v2_disk_dqheader dqhead; + ssize_t size; + loff_t offset = 0; + mm_segment_t fs; + + dqhead.dqh_magic = cpu_to_le32(quota_magics[type]); + dqhead.dqh_version = cpu_to_le32(quota_versions[type]); + + fs = get_fs(); + set_fs(KERNEL_DS); + size = f->f_op->write(f, (char *)&dqhead, sizeof(dqhead), &offset); + set_fs(fs); + if (size != sizeof(dqhead)) { + CERROR("error writing dqhead in quota file"); + return -1; + } + + return 0; +} + +/* write dqinfo struct in a new quota file */ +static int v2_write_dqinfo(struct file *f, int type, struct if_dqinfo *info) +{ + struct v2_disk_dqinfo dqinfo; + int blocks = V2_DQTREEOFF + 1; + ssize_t size; + loff_t offset = V2_DQINFOOFF; + mm_segment_t fs; + + if (info) { + dqinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace); + dqinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace); + dqinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK & + ~DQF_INFO_DIRTY); + } else { + dqinfo.dqi_bgrace = cpu_to_le32(MAX_DQ_TIME); + dqinfo.dqi_igrace = cpu_to_le32(MAX_IQ_TIME); + dqinfo.dqi_flags = 0; + } + + dqinfo.dqi_blocks = cpu_to_le32(blocks); + dqinfo.dqi_free_blk = 0; + dqinfo.dqi_free_entry = 0; + + fs = get_fs(); + set_fs(KERNEL_DS); + size = f->f_op->write(f, (char *)&dqinfo, sizeof(dqinfo), &offset); + set_fs(fs); + + if (size != sizeof(dqinfo)) { + CERROR("error writing dqinfo in quota file"); + return -1; + } + + return 0; +} + +static int create_new_quota_files(struct qchk_ctxt *qctxt, + struct obd_quotactl *oqc) +{ + int i, rc = 0; + ENTRY; + + for (i = 0; i < MAXQUOTAS; i++) if (Q_TYPESET(oqc, i)) { + struct if_dqinfo *info = qctxt->first_check[i]? NULL: + &qctxt->dqinfo[i]; + struct file *file; + + file = filp_open(op_quotafile[i], + O_RDWR | O_CREAT | O_TRUNC, 0644); + if (IS_ERR(file)) { + rc = PTR_ERR(file); + CERROR("can't create %s file: rc = %d\n", + op_quotafile[i], rc); + GOTO(out, rc); + } + + rc = v2_write_dqheader(file, i); + if (rc) { + filp_close(file, 0); + GOTO(out, rc = -EIO); + } + + rc = v2_write_dqinfo(file, i, info); + if (rc) { + filp_close(file, 0); + GOTO(out, rc = -EIO); + } + + filp_close(file, 0); + } + +out: + RETURN(rc); +} + + +static int commit_chkquot(struct super_block *sb, struct qchk_ctxt *qctxt, + struct chkquot *cq) +{ + struct obd_quotactl oqc = { 0, }; + struct timeval now; + + do_gettimeofday(&now); + + if (cq->cq_bsoftlimit && + toqb(cq->cq_curspace) >= cq->cq_bsoftlimit && + !cq->cq_btime) + cq->cq_btime = now.tv_sec + + qctxt->dqinfo[cq->cq_type].dqi_bgrace; + + if (cq->cq_isoftlimit && + cq->cq_curinodes >= cq->cq_isoftlimit && + !cq->cq_itime) + cq->cq_itime = now.tv_sec + + qctxt->dqinfo[cq->cq_type].dqi_igrace; + + oqc.qc_cmd = Q_SETQUOTA; + oqc.qc_type = cq->cq_type; + oqc.qc_id = cq->cq_id; + oqc.qc_dqblk.dqb_bhardlimit = cq->cq_bhardlimit; + oqc.qc_dqblk.dqb_bsoftlimit = cq->cq_bsoftlimit; + oqc.qc_dqblk.dqb_curspace = cq->cq_curspace; + oqc.qc_dqblk.dqb_ihardlimit = cq->cq_ihardlimit; + oqc.qc_dqblk.dqb_isoftlimit = cq->cq_isoftlimit; + oqc.qc_dqblk.dqb_curinodes = cq->cq_curinodes; + oqc.qc_dqblk.dqb_btime = cq->cq_btime; + oqc.qc_dqblk.dqb_itime = cq->cq_itime; + oqc.qc_dqblk.dqb_valid = QIF_ALL; + + return fsfilt_ext3_quotactl(sb, &oqc); +} + +static int prune_chkquots(struct super_block *sb, + struct qchk_ctxt *qctxt, int error) +{ + struct chkquot *cq, *tmp; + int rc; + + list_for_each_entry_safe(cq, tmp, &qctxt->list, cq_list) { + if (!error) { + rc = commit_chkquot(sb, qctxt, cq); + if (rc) + error = rc; + } + hlist_del_init(&cq->cq_hash); + list_del(&cq->cq_list); + OBD_FREE(cq, sizeof(*cq)); + } + + return error; +} + +static int fsfilt_ext3_quotacheck(struct super_block *sb, + struct obd_quotactl *oqc) +{ + struct ext3_sb_info *sbi = EXT3_SB(sb); + int i, group; + struct qchk_ctxt *qctxt; + struct buffer_head *bitmap_bh = NULL; + unsigned long ino; + struct inode *inode; + int rc; + ENTRY; + + /* turn on quota and read dqinfo if existed */ + OBD_ALLOC(qctxt, sizeof(*qctxt)); + if (!qctxt) + RETURN(-ENOMEM); + + for (i = 0; i < NR_DQHASH; i++) + INIT_HLIST_HEAD(&qctxt->hash[i]); + INIT_LIST_HEAD(&qctxt->list); + + for (i = 0; i < MAXQUOTAS; i++) if (Q_TYPESET(oqc, i)) { + rc = quota_onoff(sb, Q_QUOTAON, i); + if (!rc || rc == -EBUSY) + read_old_dqinfo(sb, i, qctxt->dqinfo); + else if (rc == -ENOENT) + qctxt->first_check[i] = 1; + else if (rc) + GOTO(out, rc); + } + + /* check quota and update in hash */ + for (group = 0; group < sbi->s_groups_count; group++) { + ino = group * sbi->s_inodes_per_group + 1; + brelse(bitmap_bh); + bitmap_bh = read_inode_bitmap(sb, group); + + for (i = 0; i < sbi->s_inodes_per_group; i++, ino++) { + if (ino < sbi->s_first_ino) + continue; + + inode = ext3_iget_inuse(sb, bitmap_bh, i, ino); + rc = add_inode_quota(inode, qctxt, oqc); + iput(inode); + if (rc) { + brelse(bitmap_bh); + GOTO(out, rc); + } + } + } + brelse(bitmap_bh); + + /* turn off quota cause we are to dump chkquot to files */ + quota_onoff(sb, Q_QUOTAOFF, oqc->qc_type); + + rc = create_new_quota_files(qctxt, oqc); + if (rc) + GOTO(out, rc); + + /* we use vfs functions to set dqblk, so turn quota on */ + rc = quota_onoff(sb, Q_QUOTAON, oqc->qc_type); + if (rc) + GOTO(out, rc); + +out: + /* dump and free chkquot */ + rc = prune_chkquots(sb, qctxt, rc); + OBD_FREE(qctxt, sizeof(*qctxt)); + + /* turn off quota, `lfs quotacheck` will turn on when all + * nodes quotacheck finish. */ + quota_onoff(sb, Q_QUOTAOFF, oqc->qc_type); + + if (rc) + CERROR("quotacheck failed: rc = %d\n", rc); + + oqc->qc_stat = rc; + RETURN(rc); +} + +static int fsfilt_ext3_quotainfo(struct lustre_quota_info *lqi, int type, int cmd) +{ + int rc = 0; + ENTRY; + + switch (cmd) { + case QFILE_CHK: + rc = lustre_check_quota_file(lqi, type); + break; + case QFILE_RD_INFO: + rc = lustre_read_quota_info(lqi, type); + break; + case QFILE_WR_INFO: + rc = lustre_write_quota_info(lqi, type); + break; + case QFILE_INIT_INFO: + rc = lustre_init_quota_info(lqi, type); + break; + default: + CERROR("Unsupported admin quota file cmd %d\n", cmd); + LBUG(); + break; + } + RETURN(rc); +} + +static int fsfilt_ext3_dquot(struct lustre_dquot *dquot, int cmd) +{ + int rc = 0; + ENTRY; + + switch (cmd) { + case QFILE_RD_DQUOT: + rc = lustre_read_dquot(dquot); + break; + case QFILE_WR_DQUOT: + if (dquot->dq_dqb.dqb_ihardlimit || + dquot->dq_dqb.dqb_isoftlimit || + dquot->dq_dqb.dqb_bhardlimit || + dquot->dq_dqb.dqb_bsoftlimit) + clear_bit(DQ_FAKE_B, &dquot->dq_flags); + else + set_bit(DQ_FAKE_B, &dquot->dq_flags); + + rc = lustre_commit_dquot(dquot); + if (rc >= 0) + rc = 0; + break; + default: + CERROR("Unsupported admin quota file cmd %d\n", cmd); + LBUG(); + break; + } + RETURN(rc); +} + static struct fsfilt_operations fsfilt_ext3_ops = { .fs_type = "ext3", .fs_owner = THIS_MODULE, @@ -1194,6 +1743,10 @@ static struct fsfilt_operations fsfilt_ext3_ops = { .fs_setup = fsfilt_ext3_setup, .fs_send_bio = fsfilt_ext3_send_bio, .fs_get_op_len = fsfilt_ext3_get_op_len, + .fs_quotactl = fsfilt_ext3_quotactl, + .fs_quotacheck = fsfilt_ext3_quotacheck, + .fs_quotainfo = fsfilt_ext3_quotainfo, + .fs_dquot = fsfilt_ext3_dquot, }; static int __init fsfilt_ext3_init(void) diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c index b463b88..054e6ec 100644 --- a/lustre/lvfs/lvfs_linux.c +++ b/lustre/lvfs/lvfs_linux.c @@ -48,11 +48,11 @@ #include #include +#include atomic_t obd_memory; int obd_memmax; - /* Debugging check only needed during development */ #ifdef OBD_CTXT_DEBUG # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC) diff --git a/lustre/lvfs/quotacheck_test.c b/lustre/lvfs/quotacheck_test.c new file mode 100644 index 0000000..0791981 --- /dev/null +++ b/lustre/lvfs/quotacheck_test.c @@ -0,0 +1,230 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2003 Cluster File Systems, Inc. + * Author: Lai Siyao + * + * This file is part of Lustre, http://www.lustre.org/ + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * A kernel module which tests the fsfilt quotacheck API from the OBD setup function. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_CLASS + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +char *test_quotafile[] = {"aquotacheck.user", "aquotacheck.group"}; + +static inline struct ext3_group_desc * +get_group_desc(struct super_block *sb, int group) +{ + unsigned long desc_block, desc; + struct ext3_group_desc *gdp; + + desc_block = group / EXT3_DESC_PER_BLOCK(sb); + desc = group % EXT3_DESC_PER_BLOCK(sb); + gdp = (struct ext3_group_desc *) + EXT3_SB(sb)->s_group_desc[desc_block]->b_data; + + return gdp + desc; +} + +static inline struct buffer_head * +read_inode_bitmap(struct super_block *sb, unsigned long group) +{ + struct ext3_group_desc *desc; + struct buffer_head *bh; + + desc = get_group_desc(sb, group); + bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); + + return bh; +} + +static inline struct inode *ext3_iget_inuse(struct super_block *sb, + struct buffer_head *bitmap_bh, + int index, unsigned long ino) +{ + struct inode *inode = NULL; + + if (ext3_test_bit(index, bitmap_bh->b_data)) { + CERROR("i: %d, ino: %lu\n", index, ino); + ll_sleep(1); + inode = iget(sb, ino); + } + + return inode; +} + +static void print_inode(struct inode *inode) +{ + loff_t size = 0; + + if (S_ISDIR(inode->i_mode) || + S_ISREG(inode->i_mode) || + S_ISLNK(inode->i_mode)) + size = inode_get_bytes(inode); + + CERROR("%lu: uid: %u, size: %llu, blocks: %lu, real size: %llu\n", + inode->i_ino, inode->i_uid, inode->i_size, inode->i_blocks, size); +} + +/* Test quotaon */ +static int quotacheck_test_1(struct obd_device *obd, struct super_block *sb) +{ + struct ext3_sb_info *sbi = EXT3_SB(sb); + struct buffer_head *bitmap_bh = NULL; + struct inode *inode; + unsigned long ino; + int i, group; + ENTRY; + + for (group = 0; group < sbi->s_groups_count; group++) { + ino = group * sbi->s_inodes_per_group + 1; + brelse(bitmap_bh); + bitmap_bh = read_inode_bitmap(sb, group); + + if (group == 0) + CERROR("groups_count: %lu, inodes_per_group: %lu, first_ino: %u, inodes_count: %u\n", + sbi->s_groups_count, sbi->s_inodes_per_group, + sbi->s_first_ino, le32_to_cpu(sbi->s_es->s_inodes_count)); + + for (i = 0; i < sbi->s_inodes_per_group; i++, ino++) { + if (ino < sbi->s_first_ino) + continue; + if (ino > le32_to_cpu(sbi->s_es->s_inodes_count)) { + CERROR("bad inode number: %lu > s_inodes_count\n", ino); + brelse(bitmap_bh); + RETURN(-E2BIG); + } + inode = ext3_iget_inuse(sb, bitmap_bh, i, ino); + if (inode) + print_inode(inode); + iput(inode); + } + } + brelse(bitmap_bh); + + RETURN(0); +} + +/* ------------------------------------------------------------------------- + * Tests above, boring obd functions below + * ------------------------------------------------------------------------- */ +static int quotacheck_run_tests(struct obd_device *obd, struct obd_device *tgt) +{ + struct super_block *sb; + int rc; + ENTRY; + + if (!strcmp(tgt->obd_type->typ_name, LUSTRE_MDS_NAME)) + sb = tgt->u.mds.mds_sb; + else if (!strcmp(tgt->obd_type->typ_name, "obdfilter")) + sb = tgt->u.filter.fo_sb; + else { + CERROR("TARGET OBD should be mds or ost\n"); + RETURN(-EINVAL); + } + + rc = quotacheck_test_1(tgt, sb); + + return rc; +} + +static int quotacheck_test_cleanup(struct obd_device *obd) +{ + lprocfs_obd_cleanup(obd); + return 0; +} + +static int quotacheck_test_setup(struct obd_device *obd, obd_count len, void *buf) +{ + struct lprocfs_static_vars lvars; + struct lustre_cfg *lcfg = buf; + struct obd_device *tgt; + int rc; + ENTRY; + + if (lcfg->lcfg_bufcount < 1) { + CERROR("requires a mds OBD name\n"); + RETURN(-EINVAL); + } + + tgt = class_name2obd(lustre_cfg_string(lcfg, 1)); + if (!tgt || !tgt->obd_attached || !tgt->obd_set_up) { + CERROR("target device not attached or not set up (%s)\n", + lustre_cfg_string(lcfg, 1)); + RETURN(-EINVAL); + } + + rc = quotacheck_run_tests(obd, tgt); + if (rc) + quotacheck_test_cleanup(obd); + + lprocfs_init_vars(quotacheck_test, &lvars); + lprocfs_obd_setup(obd, lvars.obd_vars); + + RETURN(rc); +} + +static struct obd_ops quotacheck_obd_ops = { + .o_owner = THIS_MODULE, + .o_setup = quotacheck_test_setup, + .o_cleanup = quotacheck_test_cleanup, +}; + +static struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; +static struct lprocfs_vars lprocfs_module_vars[] = { {0} }; +LPROCFS_INIT_VARS(quotacheck_test, lprocfs_module_vars, lprocfs_obd_vars) + +static int __init quotacheck_test_init(void) +{ + struct lprocfs_static_vars lvars; + + lprocfs_init_vars(quotacheck_test, &lvars); + return class_register_type("acheck_obd_ops, lvars.module_vars, + "quotacheck_test"); +} + +static void __exit quotacheck_test_exit(void) +{ + class_unregister_type("quotacheck_test"); +} + +MODULE_AUTHOR("Cluster File Systems, Inc. "); +MODULE_DESCRIPTION("quotacheck test module"); +MODULE_LICENSE("GPL"); + +module_init(quotacheck_test_init); +module_exit(quotacheck_test_exit); diff --git a/lustre/lvfs/quotactl_test.c b/lustre/lvfs/quotactl_test.c new file mode 100644 index 0000000..c05efde --- /dev/null +++ b/lustre/lvfs/quotactl_test.c @@ -0,0 +1,374 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2003 Cluster File Systems, Inc. + * Author: Lai Siyao + * + * This file is part of Lustre, http://www.lustre.org/ + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * A kernel module which tests the fsfilt quotactl API from the OBD setup function. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_CLASS + +#include +#include + +#include +#include +#include +#include + +char *test_quotafile[] = {"aquotactl.user", "aquotactl.group"}; + +/* Test quotaon */ +static int quotactl_test_1(struct obd_device *obd, struct super_block *sb) +{ + struct obd_quotactl oqctl; + int rc; + ENTRY; + + oqctl.qc_cmd = Q_QUOTAON; + oqctl.qc_id = QFMT_LDISKFS; + oqctl.qc_type = UGQUOTA; + rc = fsfilt_quotactl(obd, sb, &oqctl); + if (rc) { + CERROR("1a: quotactl Q_QUOTAON failed: %d\n", rc); + RETURN(rc); + } + + RETURN(0); +} + +#if 0 /* set/getinfo not supported, this is for cluster-wide quotas */ +/* Test set/getinfo */ +static int quotactl_test_2(struct obd_device *obd, struct super_block *sb) +{ + struct obd_quotactl oqctl; + int rc; + ENTRY; + + oqctl.qc_cmd = Q_SETINFO; + oqctl.qc_type = USRQUOTA; + oqctl.qc_dqinfo.dqi_bgrace = 1616; + oqctl.qc_dqinfo.dqi_igrace = 2828; + oqctl.qc_dqinfo.dqi_flags = 0; + rc = fsfilt_quotactl(obd, sb, &oqctl); + if (rc) { + CERROR("2a: quotactl Q_SETINFO failed: %d\n", rc); + RETURN(rc); + } + + oqctl.qc_cmd = Q_GETINFO; + oqctl.qc_type = USRQUOTA; + rc = fsfilt_quotactl(obd, sb, &oqctl); + if (rc) { + CERROR("2b: quotactl Q_GETINFO failed: %d\n", rc); + RETURN(rc); + } + if (oqctl.qc_dqinfo.dqi_bgrace != 1616 || + oqctl.qc_dqinfo.dqi_igrace != 2828 || + oqctl.qc_dqinfo.dqi_flags != 0) { + CERROR("2c: quotactl Q_GETINFO get wrong result: %d, %d, %d\n", + oqctl.qc_dqinfo.dqi_bgrace, + oqctl.qc_dqinfo.dqi_igrace, + oqctl.qc_dqinfo.dqi_flags); + RETURN(-EINVAL); + } + + RETURN(0); +} +#endif + +/* Test set/getquota */ +static int quotactl_test_3(struct obd_device *obd, struct super_block *sb) +{ + struct obd_quotactl oqctl; + int rc; + ENTRY; + + oqctl.qc_cmd = Q_SETQUOTA; + oqctl.qc_type = USRQUOTA; + oqctl.qc_id = 500; + oqctl.qc_dqblk.dqb_bhardlimit = 919; + oqctl.qc_dqblk.dqb_bsoftlimit = 818; + oqctl.qc_dqblk.dqb_ihardlimit = 616; + oqctl.qc_dqblk.dqb_isoftlimit = 515; + oqctl.qc_dqblk.dqb_valid = QIF_LIMITS; + rc = fsfilt_quotactl(obd, sb, &oqctl); + if (rc) { + CERROR("3a: quotactl Q_SETQUOTA failed: %d\n", rc); + RETURN(rc); + } + + oqctl.qc_cmd = Q_GETQUOTA; + oqctl.qc_type = USRQUOTA; + oqctl.qc_id = 500; + rc = fsfilt_quotactl(obd, sb, &oqctl); + if (rc) { + CERROR("3b: quotactl Q_SETQUOTA failed: %d\n", rc); + RETURN(rc); + } + if (oqctl.qc_dqblk.dqb_bhardlimit != 919 || + oqctl.qc_dqblk.dqb_bsoftlimit != 818 || + oqctl.qc_dqblk.dqb_ihardlimit != 616 || + oqctl.qc_dqblk.dqb_isoftlimit != 515) { + CERROR("3c: quotactl Q_GETQUOTA get wrong result:" + "%llu, %llu, %llu, %llu\n", + oqctl.qc_dqblk.dqb_bhardlimit, + oqctl.qc_dqblk.dqb_bsoftlimit, + oqctl.qc_dqblk.dqb_ihardlimit, + oqctl.qc_dqblk.dqb_isoftlimit); + RETURN(-EINVAL); + } + + oqctl.qc_cmd = Q_SETQUOTA; + oqctl.qc_type = USRQUOTA; + oqctl.qc_id = 500; + oqctl.qc_dqblk.dqb_curspace = 717; + oqctl.qc_dqblk.dqb_curinodes = 414; + oqctl.qc_dqblk.dqb_valid = QIF_USAGE; + rc = fsfilt_quotactl(obd, sb, &oqctl); + if (rc) { + CERROR("3d: quotactl Q_SETQUOTA failed: %d\n", rc); + RETURN(rc); + } + + oqctl.qc_cmd = Q_GETQUOTA; + oqctl.qc_type = USRQUOTA; + oqctl.qc_id = 500; + rc = fsfilt_quotactl(obd, sb, &oqctl); + if (rc) { + CERROR("3e: quotactl Q_SETQUOTA failed: %d\n", rc); + RETURN(rc); + } + if (oqctl.qc_dqblk.dqb_curspace != 717 || + oqctl.qc_dqblk.dqb_curinodes != 414) { + CERROR("3f: quotactl Q_GETQUOTA get wrong result: %llu, %llu\n", + oqctl.qc_dqblk.dqb_curspace, + oqctl.qc_dqblk.dqb_curinodes); + RETURN(-EINVAL); + } + + oqctl.qc_cmd = Q_SETQUOTA; + oqctl.qc_type = USRQUOTA; + oqctl.qc_dqblk.dqb_btime = 313; + oqctl.qc_dqblk.dqb_itime = 212; + oqctl.qc_id = 500; + oqctl.qc_dqblk.dqb_valid = QIF_TIMES; + rc = fsfilt_quotactl(obd, sb, &oqctl); + if (rc) { + CERROR("3g: quotactl Q_SETQUOTA failed: %d\n", rc); + RETURN(rc); + } + + oqctl.qc_cmd = Q_GETQUOTA; + oqctl.qc_type = USRQUOTA; + oqctl.qc_id = 500; + rc = fsfilt_quotactl(obd, sb, &oqctl); + if (rc) { + CERROR("3h: quotactl Q_SETQUOTA failed: %d\n", rc); + RETURN(rc); + } + if (oqctl.qc_dqblk.dqb_btime != 313 || + oqctl.qc_dqblk.dqb_itime != 212) { + CERROR("3i: quotactl Q_GETQUOTA get wrong result: %llu, %llu\n", + oqctl.qc_dqblk.dqb_btime, + oqctl.qc_dqblk.dqb_itime); + RETURN(-EINVAL); + } + + oqctl.qc_cmd = Q_SETQUOTA; + oqctl.qc_type = USRQUOTA; + oqctl.qc_id = 500; + oqctl.qc_dqblk.dqb_bhardlimit = 919; + oqctl.qc_dqblk.dqb_bsoftlimit = 818; + oqctl.qc_dqblk.dqb_curspace = 717; + oqctl.qc_dqblk.dqb_ihardlimit = 616; + oqctl.qc_dqblk.dqb_isoftlimit = 515; + oqctl.qc_dqblk.dqb_curinodes = 414; + oqctl.qc_dqblk.dqb_btime = 313; + oqctl.qc_dqblk.dqb_itime = 212; + oqctl.qc_dqblk.dqb_valid = QIF_ALL; + rc = fsfilt_quotactl(obd, sb, &oqctl); + if (rc) { + CERROR("3j: quotactl Q_SETQUOTA failed: %d\n", rc); + RETURN(rc); + } + + oqctl.qc_cmd = Q_GETQUOTA; + oqctl.qc_type = USRQUOTA; + oqctl.qc_id = 500; + rc = fsfilt_quotactl(obd, sb, &oqctl); + if (rc) { + CERROR("3k: quotactl Q_SETQUOTA failed: %d\n", rc); + RETURN(rc); + } + if (oqctl.qc_dqblk.dqb_bhardlimit != 919 || + oqctl.qc_dqblk.dqb_bsoftlimit != 818 || + oqctl.qc_dqblk.dqb_ihardlimit != 616 || + oqctl.qc_dqblk.dqb_isoftlimit != 515 || + oqctl.qc_dqblk.dqb_curspace != 717 || + oqctl.qc_dqblk.dqb_curinodes != 414 || + oqctl.qc_dqblk.dqb_btime != 0 || + oqctl.qc_dqblk.dqb_itime != 0) { + CERROR("3l: quotactl Q_GETQUOTA get wrong result:" + "%llu, %llu, %llu, %llu, %llu, %llu, %llu, %llu\n", + oqctl.qc_dqblk.dqb_bhardlimit, + oqctl.qc_dqblk.dqb_bsoftlimit, + oqctl.qc_dqblk.dqb_ihardlimit, + oqctl.qc_dqblk.dqb_isoftlimit, + oqctl.qc_dqblk.dqb_curspace, + oqctl.qc_dqblk.dqb_curinodes, + oqctl.qc_dqblk.dqb_btime, + oqctl.qc_dqblk.dqb_itime); + RETURN(-EINVAL); + } + + RETURN(0); +} + +/* Test quotaoff */ +static int quotactl_test_4(struct obd_device *obd, struct super_block *sb) +{ + struct obd_quotactl oqctl; + int rc; + ENTRY; + + oqctl.qc_cmd = Q_QUOTAOFF; + oqctl.qc_id = 500; + oqctl.qc_type = UGQUOTA; + rc = fsfilt_quotactl(obd, sb, &oqctl); + if (rc) { + CERROR("4a: quotactl Q_QUOTAOFF failed: %d\n", rc); + RETURN(rc); + } + + RETURN(0); +} + +/* ------------------------------------------------------------------------- + * Tests above, boring obd functions below + * ------------------------------------------------------------------------- */ +static int quotactl_run_tests(struct obd_device *obd, struct obd_device *tgt) +{ + struct super_block *sb; + struct obd_run_ctxt saved; + int rc; + ENTRY; + + if (!strcmp(tgt->obd_type->typ_name, LUSTRE_MDS_NAME)) + sb = tgt->u.mds.mds_sb; + else if (!strcmp(tgt->obd_type->typ_name, "obdfilter")) + sb = tgt->u.filter.fo_sb; + else { + CERROR("TARGET OBD should be mds or obdfilter\n"); + RETURN(-EINVAL); + } + + push_ctxt(&saved, &tgt->obd_ctxt, NULL); + + rc = quotactl_test_1(tgt, sb); + if (rc) + GOTO(cleanup, rc); + +#if 0 + rc = quotactl_test_2(tgt, sb); + if (rc) + GOTO(cleanup, rc); +#endif + + rc = quotactl_test_3(tgt, sb); + if (rc) + GOTO(cleanup, rc); + + cleanup: + quotactl_test_4(tgt, sb); + + pop_ctxt(&saved, &tgt->obd_ctxt, NULL); + + return rc; +} + +static int quotactl_test_cleanup(struct obd_device *obd) +{ + lprocfs_obd_cleanup(obd); + return 0; +} + +static int quotactl_test_setup(struct obd_device *obd, obd_count len, void *buf) +{ + struct lprocfs_static_vars lvars; + struct lustre_cfg *lcfg = buf; + struct obd_device *tgt; + int rc; + ENTRY; + + if (lcfg->lcfg_bufcount < 1) { + CERROR("requires a mds OBD name\n"); + RETURN(-EINVAL); + } + + tgt = class_name2obd(lustre_cfg_string(lcfg, 1)); + if (!tgt || !tgt->obd_attached || !tgt->obd_set_up) { + CERROR("target device not attached or not set up (%s)\n", + lustre_cfg_string(lcfg, 1)); + RETURN(-EINVAL); + } + + lprocfs_init_vars(quotactl_test, &lvars); + lprocfs_obd_setup(obd, lvars.obd_vars); + + rc = quotactl_run_tests(obd, tgt); + + quotactl_test_cleanup(obd); + + RETURN(rc); +} + +static struct obd_ops quotactl_obd_ops = { + .o_owner = THIS_MODULE, + .o_setup = quotactl_test_setup, + .o_cleanup = quotactl_test_cleanup, +}; + +static struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; +static struct lprocfs_vars lprocfs_module_vars[] = { {0} }; +LPROCFS_INIT_VARS(quotactl_test, lprocfs_module_vars, lprocfs_obd_vars) + +static int __init quotactl_test_init(void) +{ + struct lprocfs_static_vars lvars; + + lprocfs_init_vars(quotactl_test, &lvars); + return class_register_type("actl_obd_ops, lvars.module_vars, + "quotactl_test"); +} + +static void __exit quotactl_test_exit(void) +{ + class_unregister_type("quotactl_test"); +} + +MODULE_AUTHOR("Cluster File Systems, Inc. "); +MODULE_DESCRIPTION("quotactl test module"); +MODULE_LICENSE("GPL"); + +module_init(quotactl_test_init); +module_exit(quotactl_test_exit); diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index d6e7b50..4702f5d 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -221,7 +221,7 @@ static void mdc_realloc_openmsg(struct ptlrpc_request *req, OBD_ALLOC(new_msg, new_size); if (new_msg != NULL) { struct lustre_msg *old_msg = req->rq_reqmsg; - long irqflags; + unsigned long irqflags; DEBUG_REQ(D_INFO, req, "replace reqmsg for larger EA %u\n", body->eadatasize); diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 4c21f5a..7ba104f 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -611,6 +611,97 @@ int mdc_readpage(struct obd_export *exp, struct ll_fid *mdc_fid, __u64 offset, return rc; } +static int mdc_quotacheck(struct obd_export *exp, struct obd_quotactl *oqctl) +{ + struct client_obd *cli = &exp->exp_obd->u.cli; + struct ptlrpc_request *req; + struct obd_quotactl *body; + int size = sizeof(*body); + int rc; + ENTRY; + + req = ptlrpc_prep_req(class_exp2cliimp(exp), MDS_QUOTACHECK, 1, &size, + NULL); + if (!req) + GOTO(out, rc = -ENOMEM); + + body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body)); + memcpy(body, oqctl, sizeof(*body)); + + req->rq_replen = lustre_msg_size(0, NULL); + + spin_lock(&cli->cl_qchk_lock); + cli->cl_qchk_stat = CL_QUOTACHECKING; + spin_unlock(&cli->cl_qchk_lock); + rc = ptlrpc_queue_wait(req); + if (rc) { + spin_lock(&cli->cl_qchk_lock); + cli->cl_qchk_stat = rc; + spin_unlock(&cli->cl_qchk_lock); + } +out: + ptlrpc_req_finished(req); + RETURN (rc); +} + +static int mdc_poll_quotacheck(struct obd_export *exp, + struct if_quotacheck *qchk) +{ + struct client_obd *cli = &exp->exp_obd->u.cli; + int stat; + ENTRY; + + spin_lock(&cli->cl_qchk_lock); + stat = cli->cl_qchk_stat; + spin_unlock(&cli->cl_qchk_lock); + + qchk->stat = stat; + if (stat == CL_QUOTACHECKING) { + qchk->stat = -ENODATA; + stat = 0; + } else if (stat) { + if (qchk->stat > CL_QUOTACHECKING) + qchk->stat = stat = -EINTR; + + strncpy(qchk->obd_type, LUSTRE_MDS_NAME, 10); + qchk->obd_uuid = cli->cl_import->imp_target_uuid; + } + RETURN(stat); +} + +static int mdc_quotactl(struct obd_export *exp, struct obd_quotactl *oqctl) +{ + struct ptlrpc_request *req; + struct obd_quotactl *oqc; + int size = sizeof(*oqctl); + int rc; + ENTRY; + + req = ptlrpc_prep_req(class_exp2cliimp(exp), MDS_QUOTACTL, 1, &size, + NULL); + if (!req) + GOTO(out, rc = -ENOMEM); + + memcpy(lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*oqctl)), oqctl, size); + + req->rq_replen = lustre_msg_size(1, &size); + + rc = ptlrpc_queue_wait(req); + if (!rc) { + oqc = lustre_swab_repbuf(req, 0, sizeof (*oqc), + lustre_swab_obd_quotactl); + if (oqc == NULL) { + CERROR ("Can't unpack mds_body\n"); + GOTO(out, rc = -EPROTO); + } + + memcpy(oqctl, oqc, sizeof(*oqctl)); + } +out: + ptlrpc_req_finished(req); + RETURN (rc); +} + static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, void *karg, void *uarg) { @@ -652,6 +743,9 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, GOTO(out, rc); } #endif + case OBD_IOC_POLL_QUOTACHECK: + rc = mdc_poll_quotacheck(exp, (struct if_quotacheck *)karg); + GOTO(out, rc); default: CERROR("mdc_ioctl(): unrecognised ioctl %#x\n", cmd); GOTO(out, rc = -ENOTTY); @@ -1045,6 +1139,8 @@ struct obd_ops mdc_obd_ops = { .o_import_event = mdc_import_event, .o_llog_init = mdc_llog_init, .o_llog_finish = mdc_llog_finish, + .o_quotacheck = mdc_quotacheck, + .o_quotactl = mdc_quotactl, }; int __init mdc_init(void) diff --git a/lustre/mds/Makefile.in b/lustre/mds/Makefile.in index 9063c80..b3b3648 100644 --- a/lustre/mds/Makefile.in +++ b/lustre/mds/Makefile.in @@ -2,4 +2,8 @@ MODULES := mds mds-objs := mds_log.o mds_unlink_open.o mds_lov.o handler.o mds_reint.o mds-objs += mds_fs.o lproc_mds.o mds_open.o mds_lib.o +ifeq ($(PATCHLEVEL),6) +mds-objs += quota_context.o quota_master.o +endif + @INCLUDE_RULES@ diff --git a/lustre/mds/autoMakefile.am b/lustre/mds/autoMakefile.am index b05a4b5..91277b5 100644 --- a/lustre/mds/autoMakefile.am +++ b/lustre/mds/autoMakefile.am @@ -8,4 +8,5 @@ modulefs_DATA = mds$(KMODEXT) endif MOSTLYCLEANFILES = *.o *.ko *.mod.c -DIST_SOURCES = $(mds-objs:%.o=%.c) mds_internal.h +DIST_SOURCES := $(mds-objs:%.o=%.c) mds_internal.h +DIST_SOURCES += quota_context.c quota_master.c diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 50853b9..f722340 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -53,9 +53,12 @@ #include #include #include +#include #include "mds_internal.h" +static struct quotacheck_info qchkinfo; + static int mds_intent_policy(struct ldlm_namespace *ns, struct ldlm_lock **lockp, void *req_cookie, ldlm_mode_t mode, int flags, void *data); @@ -1011,6 +1014,205 @@ out: RETURN(0); } +static int mds_quotacheck_callback(struct obd_export *exp, + struct obd_quotactl *oqctl) +{ + struct ptlrpc_request *req; + struct obd_quotactl *body; + int rc, size = sizeof(*oqctl); + + req = ptlrpc_prep_req(exp->exp_imp_reverse, OBD_QC_CALLBACK, + 1, &size, NULL); + if (!req) + RETURN(-ENOMEM); + + body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body)); + memcpy(body, oqctl, sizeof(*oqctl)); + + req->rq_replen = lustre_msg_size(0, NULL); + + rc = ptlrpc_queue_wait(req); + ptlrpc_req_finished(req); + + RETURN(rc); +} + + +static int mds_quotacheck_thread(void *data) +{ + unsigned long flags; + struct quotacheck_info *qchki = data; + struct obd_device *obd; + struct obd_export *exp; + struct obd_quotactl *oqctl; + struct obd_run_ctxt saved; + int rc; + + lock_kernel(); + ptlrpc_daemonize(); + + SIGNAL_MASK_LOCK(current, flags); + sigfillset(¤t->blocked); + RECALC_SIGPENDING; + SIGNAL_MASK_UNLOCK(current, flags); + + THREAD_NAME(current->comm, sizeof(current->comm) - 1, "%s", "quotacheck"); + unlock_kernel(); + + complete(&qchki->qi_starting); + + exp = qchki->qi_exp; + oqctl = &qchki->qi_oqctl; + obd = exp->exp_obd; + + push_ctxt(&saved, &obd->obd_ctxt, NULL); + + rc = fsfilt_quotacheck(obd, obd->u.mds.mds_sb, oqctl); + if (rc) + CERROR("%s: fsfilt_quotacheck: %d\n", obd->obd_name, rc); + + pop_ctxt(&saved, &obd->obd_ctxt, NULL); + + rc = mds_quotacheck_callback(exp, oqctl); + + atomic_inc(&obd->u.mds.mds_quotachecking); + + return rc; +} + +static int mds_quotacheck(struct ptlrpc_request *req) +{ + struct obd_device *obd = req->rq_export->exp_obd; + struct mds_obd *mds = &obd->u.mds; + struct obd_quotactl *oqctl; + int rc = 0; + ENTRY; + + oqctl = lustre_swab_reqbuf(req, 0, sizeof(*oqctl), + lustre_swab_obd_quotactl); + if (oqctl == NULL) + RETURN(-EPROTO); + + rc = lustre_pack_reply(req, 0, NULL, NULL); + if (rc) { + CERROR("mds: out of memory while packing quotacheck reply\n"); + RETURN(rc); + } + + /* XXX: quotaoff */ + GOTO(out, rc = -EOPNOTSUPP); + + if (!atomic_dec_and_test(&mds->mds_quotachecking)) { + atomic_inc(&mds->mds_quotachecking); + GOTO(out, rc = -EBUSY); + } + + init_completion(&qchkinfo.qi_starting); + qchkinfo.qi_exp = req->rq_export; + memcpy(&qchkinfo.qi_oqctl, oqctl, sizeof(*oqctl)); + + rc = init_admin_quotafiles(obd, &qchkinfo.qi_oqctl); + if (rc) { + CERROR("init_admin_quotafiles failed: %d\n", rc); + atomic_inc(&mds->mds_quotachecking); + GOTO(out, rc); + } + + rc = kernel_thread(mds_quotacheck_thread, &qchkinfo, CLONE_VM|CLONE_FILES); + if (rc < 0) { + CERROR("%s: error starting mds_quotacheck_thread: %d\n", + obd->obd_name, rc); + atomic_inc(&mds->mds_quotachecking); + } else { + CDEBUG(D_INFO, "%s: mds_quotacheck_thread: %d\n", + obd->obd_name, rc); + wait_for_completion(&qchkinfo.qi_starting); + rc = 0; + } +out: + req->rq_status = rc; + RETURN(0); +} + +static int mds_quotactl(struct ptlrpc_request *req) +{ + struct obd_device *obd = req->rq_export->exp_obd; + struct obd_quotactl *oqctl, *repoqc; + struct obd_run_ctxt saved; + int rc = 0, size = sizeof(*repoqc); + ENTRY; + + oqctl = lustre_swab_reqbuf(req, 0, sizeof(*oqctl), + lustre_swab_obd_quotactl); + if (oqctl == NULL) + RETURN(-EPROTO); + + rc = lustre_pack_reply(req, 1, &size, NULL); + if (rc) + RETURN(rc); + + /* XXX: quotaoff */ + GOTO(out, rc = -EOPNOTSUPP); + + repoqc = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repoqc)); + memcpy(repoqc, oqctl, sizeof(*repoqc)); + + switch (repoqc->qc_cmd) { + case Q_QUOTAON: + rc = mds_quota_on(obd, repoqc); + break; + case Q_QUOTAOFF: + mds_quota_off(obd, repoqc); + break; + case Q_SETINFO: + rc = mds_set_dqinfo(obd, repoqc); + break; + case Q_GETINFO: + rc = mds_get_dqinfo(obd, repoqc); + break; + case Q_SETQUOTA: + rc = mds_set_dqblk(obd, repoqc); + break; + case Q_GETQUOTA: + rc = mds_get_dqblk(obd, repoqc); + break; + case Q_GETOINFO: + case Q_GETOQUOTA: + break; + default: + CERROR("%s: unsupported mds_quotactl command: %d\n", + obd->obd_name, repoqc->qc_cmd); + LBUG(); + } + + if (rc) { + CDEBUG(D_INFO, "mds_quotactl admin op failed: rc = %d\n", rc); + GOTO(out, rc); + } + + if (repoqc->qc_cmd == Q_QUOTAON || repoqc->qc_cmd == Q_QUOTAOFF || + Q_GETOCMD(repoqc) || repoqc->qc_cmd == Q_GETQUOTA) { + struct obd_quotactl *loqc = repoqc; + + if (repoqc->qc_cmd == Q_GETQUOTA) + loqc = oqctl; + + push_ctxt(&saved, &obd->obd_ctxt, NULL); + rc = fsfilt_quotactl(obd, obd->u.mds.mds_sb, loqc); + pop_ctxt(&saved, &obd->obd_ctxt, NULL); + + if (!rc && loqc->qc_cmd == Q_GETQUOTA) { + repoqc->qc_dqblk.dqb_curinodes += + loqc->qc_dqblk.dqb_curinodes; + repoqc->qc_dqblk.dqb_curspace += + loqc->qc_dqblk.dqb_curspace; + } + } +out: + req->rq_status = rc; + RETURN(0); +} + int mds_reint(struct ptlrpc_request *req, int offset, struct lustre_handle *lockh) { @@ -1299,6 +1501,18 @@ int mds_handle(struct ptlrpc_request *req) rc = mds_set_info(req->rq_export, req); break; + case MDS_QUOTACHECK: + DEBUG_REQ(D_INODE, req, "quotacheck"); + OBD_FAIL_RETURN(OBD_FAIL_MDS_QUOTACHECK_NET, 0); + rc = mds_quotacheck(req); + break; + + case MDS_QUOTACTL: + DEBUG_REQ(D_INODE, req, "quotactl"); + OBD_FAIL_RETURN(OBD_FAIL_MDS_QUOTACTL_NET, 0); + rc = mds_quotactl(req); + break; + case OBD_PING: DEBUG_REQ(D_INODE, req, "ping"); rc = target_handle_ping(req); @@ -1469,13 +1683,16 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) CDEBUG(D_SUPER, "%s: mnt = %p\n", lustre_cfg_string(lcfg, 1), mnt); LASSERT(!ll_check_rdonly(ll_sbdev(mnt->mnt_sb))); - + + sema_init(&mds->mds_quota_info.qi_sem, 1); sema_init(&mds->mds_orphan_recovery_sem, 1); sema_init(&mds->mds_epoch_sem, 1); spin_lock_init(&mds->mds_transno_lock); mds->mds_max_mdsize = sizeof(struct lov_mds_md); mds->mds_max_cookiesize = sizeof(struct llog_cookie); + atomic_set(&mds->mds_quotachecking, 1); + sprintf(ns_name, "mds-%s", obd->obd_uuid.uuid); obd->obd_namespace = ldlm_namespace_new(ns_name, LDLM_NAMESPACE_SERVER); if (obd->obd_namespace == NULL) { @@ -1542,6 +1759,14 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) obd->obd_replayable ? "enabled" : "disabled"); } + sema_init(&mds->mds_quota_info.qi_sem, 1); + rc = qctxt_init(&mds->mds_quota_ctxt, mds->mds_sb, dqacq_handler); + if (rc) { + CERROR("initialize quota context failed! (rc:%d)\n", rc); + qctxt_cleanup(&mds->mds_quota_ctxt, 0); + GOTO(err_fs, rc); + } + RETURN(0); err_fs: @@ -1624,7 +1849,7 @@ int mds_postrecov(struct obd_device *obd) int rc, item = 0; LASSERT(!obd->obd_recovering); - LASSERT(llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT) != NULL); + LASSERT(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT) != NULL); /* set nextid first, so we are sure it happens */ rc = mds_lov_set_nextid(obd); @@ -1647,7 +1872,7 @@ int mds_postrecov(struct obd_device *obd) if (rc) GOTO(out, rc); - rc = llog_connect(llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT), + rc = llog_connect(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT), obd->u.mds.mds_lov_desc.ld_tgt_count, NULL, NULL, NULL); if (rc) { @@ -1733,6 +1958,8 @@ static int mds_cleanup(struct obd_device *obd) lprocfs_obd_cleanup(obd); + qctxt_cleanup(&mds->mds_quota_ctxt, 0); + mds_update_server_data(obd, 1); if (mds->mds_lov_objids != NULL) { OBD_FREE(mds->mds_lov_objids, @@ -2116,8 +2343,13 @@ static struct obd_ops mdt_obd_ops = { static int __init mds_init(void) { + int rc; struct lprocfs_static_vars lvars; + rc = lustre_dquot_init(); + if (rc) + return rc; + lprocfs_init_vars(mds, &lvars); class_register_type(&mds_obd_ops, lvars.module_vars, LUSTRE_MDS_NAME); lprocfs_init_vars(mdt, &lvars); @@ -2128,6 +2360,8 @@ static int __init mds_init(void) static void /*__exit*/ mds_exit(void) { + lustre_dquot_exit(); + class_unregister_type(LUSTRE_MDS_NAME); class_unregister_type(LUSTRE_MDT_NAME); } diff --git a/lustre/mds/lproc_mds.c b/lustre/mds/lproc_mds.c index 543d296..74795b4 100644 --- a/lustre/mds/lproc_mds.c +++ b/lustre/mds/lproc_mds.c @@ -49,6 +49,121 @@ static int lprocfs_mds_rd_mntdev(char *page, char **start, off_t off, int count, return snprintf(page, count, "%s\n",obd->u.mds.mds_vfsmnt->mnt_devname); } +static int lprocfs_mds_rd_bunit(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%lu\n", + obd->u.mds.mds_quota_ctxt.lqc_bunit_sz); +} + +static int lprocfs_mds_rd_iunit(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%lu\n", + obd->u.mds.mds_quota_ctxt.lqc_iunit_sz); +} + +static int lprocfs_mds_wr_bunit(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc = 0; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val % QUOTABLOCK_SIZE || + val <= obd->u.mds.mds_quota_ctxt.lqc_btune_sz) + return -EINVAL; + + obd->u.mds.mds_quota_ctxt.lqc_bunit_sz = val; + return count; +} + +static int lprocfs_mds_wr_iunit(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc = 0; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val <= obd->u.mds.mds_quota_ctxt.lqc_itune_sz) + return -EINVAL; + + obd->u.mds.mds_quota_ctxt.lqc_iunit_sz = val; + return count; +} + +static int lprocfs_mds_rd_btune(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%lu\n", + obd->u.mds.mds_quota_ctxt.lqc_btune_sz); +} + +static int lprocfs_mds_rd_itune(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%lu\n", + obd->u.mds.mds_quota_ctxt.lqc_itune_sz); +} + +static int lprocfs_mds_wr_btune(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc = 0; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val <= QUOTABLOCK_SIZE * MIN_QLIMIT || val % QUOTABLOCK_SIZE || + val >= obd->u.mds.mds_quota_ctxt.lqc_bunit_sz) + return -EINVAL; + + obd->u.mds.mds_quota_ctxt.lqc_btune_sz = val; + return count; +} + +static int lprocfs_mds_wr_itune(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc = 0; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val <= MIN_QLIMIT || + val >= obd->u.mds.mds_quota_ctxt.lqc_iunit_sz) + return -EINVAL; + + obd->u.mds.mds_quota_ctxt.lqc_itune_sz = val; + return count; +} + struct lprocfs_vars lprocfs_mds_obd_vars[] = { { "uuid", lprocfs_rd_uuid, 0, 0 }, { "blocksize", lprocfs_rd_blksize, 0, 0 }, @@ -62,6 +177,10 @@ struct lprocfs_vars lprocfs_mds_obd_vars[] = { { "recovery_status", lprocfs_obd_rd_recovery_status, 0, 0 }, { "evict_client", 0, lprocfs_wr_evict_client, 0 }, { "num_exports", lprocfs_rd_num_exports, 0, 0 }, + { "quota_bunit_sz", lprocfs_mds_rd_bunit, lprocfs_mds_wr_bunit, 0 }, + { "quota_btune_sz", lprocfs_mds_rd_btune, lprocfs_mds_wr_btune, 0 }, + { "quota_iunit_sz", lprocfs_mds_rd_iunit, lprocfs_mds_wr_iunit, 0 }, + { "quota_itune_sz", lprocfs_mds_rd_itune, lprocfs_mds_wr_itune, 0 }, { 0 } }; diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c index ffbf43f..a71a5c2 100644 --- a/lustre/mds/mds_fs.c +++ b/lustre/mds/mds_fs.c @@ -31,6 +31,8 @@ #include #include #include +#include +#include #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) #include #endif @@ -510,7 +512,7 @@ int mds_fs_cleanup(struct obd_device *obd) { struct mds_obd *mds = &obd->u.mds; struct obd_run_ctxt saved; - int rc = 0; + int i, rc = 0; if (obd->obd_fail) CERROR("%s: shutting down for failover; client state will" @@ -544,9 +546,21 @@ int mds_fs_cleanup(struct obd_device *obd) l_dput(mds->mds_pending_dir); mds->mds_pending_dir = NULL; } + + /* close admin quota files */ + down(&mds->mds_quota_info.qi_sem); + for (i = 0; i < MAXQUOTAS; i++) { + if (mds->mds_quota_info.qi_files[i]) { + filp_close(mds->mds_quota_info.qi_files[i], 0); + mds->mds_quota_info.qi_files[i] = NULL; + } + } + up(&mds->mds_quota_info.qi_sem); + pop_ctxt(&saved, &obd->obd_ctxt, NULL); shrink_dcache_parent(mds->mds_fid_de); dput(mds->mds_fid_de); + DQUOT_OFF(mds->mds_sb); return rc; } @@ -565,13 +579,18 @@ int mds_obd_create(struct obd_export *exp, struct obdo *oa, struct obd_run_ctxt saved; char fidname[LL_FID_NAMELEN]; void *handle; + struct obd_ucred ucred; int rc = 0, err, namelen; ENTRY; - push_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); + /* the owner of object file should always be root */ + memset(&ucred, 0, sizeof(ucred)); + ucred.ouc_cap = current->cap_effective | CAP_SYS_RESOURCE; + + push_ctxt(&saved, &exp->exp_obd->obd_ctxt, &ucred); sprintf(fidname, "OBJECTS/%u.%u", tmpname, current->pid); - filp = filp_open(fidname, O_CREAT | O_EXCL, 0644); + filp = filp_open(fidname, O_CREAT | O_EXCL, 0666); if (IS_ERR(filp)) { rc = PTR_ERR(filp); if (rc == -EEXIST) { @@ -632,7 +651,7 @@ out_close: rc = err; } out_pop: - pop_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); + pop_ctxt(&saved, &exp->exp_obd->obd_ctxt, &ucred); RETURN(rc); } @@ -643,13 +662,16 @@ int mds_obd_destroy(struct obd_export *exp, struct obdo *oa, struct inode *parent_inode = mds->mds_objects_dir->d_inode; struct obd_device *obd = exp->exp_obd; struct obd_run_ctxt saved; + struct obd_ucred ucred; char fidname[LL_FID_NAMELEN]; struct dentry *de; void *handle; int err, namelen, rc = 0; ENTRY; - - push_ctxt(&saved, &obd->obd_ctxt, NULL); + + memset(&ucred, 0, sizeof(ucred)); + ucred.ouc_cap = current->cap_effective | CAP_SYS_RESOURCE; + push_ctxt(&saved, &obd->obd_ctxt, &ucred); namelen = ll_fid2str(fidname, oa->o_id, oa->o_generation); @@ -688,6 +710,7 @@ out_dput: if (de != NULL) l_dput(de); up(&parent_inode->i_sem); - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + + pop_ctxt(&saved, &obd->obd_ctxt, &ucred); RETURN(rc); } diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h index 3ca6feb..792d6d0 100644 --- a/lustre/mds/mds_internal.h +++ b/lustre/mds/mds_internal.h @@ -118,6 +118,9 @@ int mds_get_parent_child_locked(struct obd_device *obd, struct mds_obd *mds, struct dentry **dchildp, int child_mode); int mds_lock_new_child(struct obd_device *obd, struct inode *inode, struct lustre_handle *child_lockh); +int mds_osc_setattr_async(struct obd_device *obd, struct inode *inode, + struct lov_mds_md *lmm, int lmm_size, + struct llog_cookie *logcookies); /* mds/mds_lib.c */ int mds_update_unpack(struct ptlrpc_request *, int offset, @@ -131,6 +134,9 @@ int mds_cleanup_orphans(struct obd_device *obd); int mds_log_op_unlink(struct obd_device *obd, struct inode *inode, struct lov_mds_md *lmm, int lmm_size, struct llog_cookie *logcookies, int cookies_size); +int mds_log_op_setattr(struct obd_device *obd, struct inode *inode, + struct lov_mds_md *lmm, int lmm_size, + struct llog_cookie *logcookies, int cookies_size); int mds_llog_init(struct obd_device *obd, struct obd_device *tgt, int count, struct llog_catid *logid); int mds_llog_finish(struct obd_device *obd, int count); @@ -186,4 +192,42 @@ void mds_pack_inode2fid(struct ll_fid *fid, struct inode *inode); void mds_pack_inode2body(struct mds_body *body, struct inode *inode); #endif +/* mds/quota_master.c */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) +int lustre_dquot_init(void); +void lustre_dquot_exit(void); +int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc); +void mds_adjust_qunit(struct obd_device *obd, uid_t cuid, gid_t cgid, + uid_t puid, gid_t pgid, int rc); +int init_admin_quotafiles(struct obd_device *obd, struct obd_quotactl *oqctl); +int mds_quota_on(struct obd_device *obd, struct obd_quotactl *oqctl); +int mds_quota_off(struct obd_device *obd, struct obd_quotactl *oqctl); +int mds_set_dqinfo(struct obd_device *obd, struct obd_quotactl *oqctl); +int mds_get_dqinfo(struct obd_device *obd, struct obd_quotactl *oqctl); +int mds_set_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl); +int mds_get_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl); +#else +static inline int lustre_dquot_init(void) { return 0; } +static inline void lustre_dquot_exit(void) { return; } +static inline int dqacq_handler(struct obd_device *obd, + struct qunit_data *qdata, int opc) {return 0;} +static inline void mds_adjust_qunit(struct obd_device *obd, uid_t cuid, + gid_t cgid, uid_t puid, + gid_t pgid, int rc) { return; } +static inline int init_admin_quotafiles(struct obd_device *obd, + struct obd_quotactl *oqctl) {return 0;} +static inline int mds_quota_on(struct obd_device *obd, + struct obd_quotactl *oqctl) { return 0; } +static inline int mds_quota_off(struct obd_device *obd, + struct obd_quotactl *oqctl) { return 0; } +static inline int mds_set_dqinfo(struct obd_device *obd, + struct obd_quotactl *oqctl) { return 0; } +static inline int mds_get_dqinfo(struct obd_device *obd, + struct obd_quotactl *oqctl) { return 0; } +static inline int mds_set_dqblk(struct obd_device *obd, + struct obd_quotactl *oqctl) { return 0; } +static inline int mds_get_dqblk(struct obd_device *obd, + struct obd_quotactl *oqctl) { return 0; } +#endif /* KERNEL_VERSION(2,5,0) */ + #endif /* _MDS_INTERNAL_H */ diff --git a/lustre/mds/mds_log.c b/lustre/mds/mds_log.c index 9fa802a..9da5623 100644 --- a/lustre/mds/mds_log.c +++ b/lustre/mds/mds_log.c @@ -34,12 +34,38 @@ #include #include #include +#include #include "mds_internal.h" +/* callback function of lov to fill unlink log record */ +static int mds_log_fill_unlink_rec(struct llog_rec_hdr *rec, void *data) +{ + struct llog_fill_rec_data *lfd = (struct llog_fill_rec_data *)data; + struct llog_unlink_rec *lur = (struct llog_unlink_rec *)rec; + + lur->lur_oid = lfd->lfd_id; + lur->lur_ogen = lfd->lfd_ogen; + + RETURN(0); +} + +/* callback function of lov to fill setattr log record */ +static int mds_log_fill_setattr_rec(struct llog_rec_hdr *rec, void *data) +{ + struct llog_fill_rec_data *lfd = (struct llog_fill_rec_data *)data; + struct llog_setattr_rec *lsr = (struct llog_setattr_rec *)rec; + + lsr->lsr_oid = lfd->lfd_id; + lsr->lsr_ogen = lfd->lfd_ogen; + + RETURN(0); +} + static int mds_llog_origin_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, - struct llog_cookie *logcookies, int numcookies) + struct llog_cookie *logcookies, int numcookies, + llog_fill_rec_cb_t fill_cb) { struct obd_device *obd = ctxt->loc_obd; struct obd_device *lov_obd = obd->u.mds.mds_osc_obd; @@ -48,7 +74,7 @@ static int mds_llog_origin_add(struct llog_ctxt *ctxt, ENTRY; lctxt = llog_get_context(lov_obd, ctxt->loc_idx); - rc = llog_add(lctxt, rec, lsm, logcookies, numcookies); + rc = llog_add(lctxt, rec, lsm, logcookies, numcookies, fill_cb); RETURN(rc); } @@ -89,6 +115,7 @@ int mds_log_op_unlink(struct obd_device *obd, struct inode *inode, struct mds_obd *mds = &obd->u.mds; struct lov_stripe_md *lsm = NULL; struct llog_ctxt *ctxt; + struct llog_unlink_rec *lur; int rc; ENTRY; @@ -100,16 +127,66 @@ int mds_log_op_unlink(struct obd_device *obd, struct inode *inode, if (rc < 0) RETURN(rc); - ctxt = llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT); - rc = llog_add(ctxt, NULL, lsm, logcookies, - cookies_size / sizeof(struct llog_cookie)); + /* first prepare unlink log record */ + OBD_ALLOC(lur, sizeof(*lur)); + if (!lur) + RETURN(-ENOMEM); + lur->lur_hdr.lrh_len = lur->lur_tail.lrt_len = sizeof(*lur); + lur->lur_hdr.lrh_type = MDS_UNLINK_REC; + + ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT); + rc = llog_add(ctxt, &lur->lur_hdr, lsm, logcookies, + cookies_size / sizeof(struct llog_cookie), + mds_log_fill_unlink_rec); + + obd_free_memmd(mds->mds_osc_exp, &lsm); + OBD_FREE(lur, sizeof(*lur)); + + RETURN(rc); +} + +int mds_log_op_setattr(struct obd_device *obd, struct inode *inode, + struct lov_mds_md *lmm, int lmm_size, + struct llog_cookie *logcookies, int cookies_size) +{ + struct mds_obd *mds = &obd->u.mds; + struct lov_stripe_md *lsm = NULL; + struct llog_ctxt *ctxt; + struct llog_setattr_rec *lsr; + int rc; + ENTRY; + + if (IS_ERR(mds->mds_osc_obd)) + RETURN(PTR_ERR(mds->mds_osc_obd)); + + rc = obd_unpackmd(mds->mds_osc_exp, &lsm, + lmm, lmm_size); + if (rc < 0) + RETURN(rc); + + OBD_ALLOC(lsr, sizeof(*lsr)); + if (!lsr) + RETURN(-ENOMEM); + + /* prepare setattr log record */ + lsr->lsr_hdr.lrh_len = lsr->lsr_tail.lrt_len = sizeof(*lsr); + lsr->lsr_hdr.lrh_type = MDS_SETATTR_REC; + lsr->lsr_uid = inode->i_uid; + lsr->lsr_gid = inode->i_gid; + + /* write setattr log */ + ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT); + rc = llog_add(ctxt, &lsr->lsr_hdr, lsm, logcookies, + cookies_size / sizeof(struct llog_cookie), + mds_log_fill_setattr_rec); obd_free_memmd(mds->mds_osc_exp, &lsm); + OBD_FREE(lsr, sizeof(*lsr)); RETURN(rc); } -static struct llog_operations mds_unlink_orig_logops = { +static struct llog_operations mds_ost_orig_logops = { lop_add: mds_llog_origin_add, lop_connect: mds_llog_origin_connect, }; @@ -125,8 +202,8 @@ int mds_llog_init(struct obd_device *obd, struct obd_device *tgt, int rc; ENTRY; - rc = llog_setup(obd, LLOG_UNLINK_ORIG_CTXT, tgt, 0, NULL, - &mds_unlink_orig_logops); + rc = llog_setup(obd, LLOG_MDS_OST_ORIG_CTXT, tgt, 0, NULL, + &mds_ost_orig_logops); if (rc) RETURN(rc); @@ -148,7 +225,7 @@ int mds_llog_finish(struct obd_device *obd, int count) int rc = 0, rc2 = 0; ENTRY; - ctxt = llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT); + ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT); if (ctxt) rc = llog_cleanup(ctxt); diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c index 740ca84..90f24bc4 100644 --- a/lustre/mds/mds_lov.c +++ b/lustre/mds/mds_lov.c @@ -529,7 +529,7 @@ int mds_lov_synchronize(void *data) if (rc != 0) RETURN(rc); - rc = llog_connect(llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT), + rc = llog_connect(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT), obd->u.mds.mds_lov_desc.ld_tgt_count, NULL, NULL, uuid); if (rc != 0) { @@ -597,8 +597,8 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched, int active) CWARN("MDS %s: in recovery, not resetting orphans on %s\n", obd->obd_name, uuid->uuid); } else { - LASSERT(llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT) != NULL); - + LASSERT(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT) != NULL); + rc = mds_lov_start_synchronize(obd, uuid); } RETURN(rc); diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c index cf9d0144..b2b9d60 100644 --- a/lustre/mds/mds_open.c +++ b/lustre/mds/mds_open.c @@ -295,8 +295,9 @@ cleanup_dentry: static int mds_create_objects(struct ptlrpc_request *req, int offset, struct mds_update_record *rec, struct mds_obd *mds, struct obd_device *obd, - struct dentry *dchild, void **handle, - obd_id **ids) + struct dentry *dchild, void **handle, obd_id **ids, + struct llog_cookie **ret_logcookies, + int *setattr_async_flag) { struct obdo *oa; struct obd_trans_info oti = { 0 }; @@ -414,6 +415,7 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset, } GOTO(out_oa, rc); } + *setattr_async_flag = 1; } else { rc = obd_iocontrol(OBD_IOC_LOV_SETEA, mds->mds_osc_exp, 0, &lsm, rec->ur_eadata); @@ -448,14 +450,35 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset, lmm_size = rc; body->eadatasize = rc; - if (*handle == NULL) - *handle = fsfilt_start(obd, inode, FSFILT_OP_CREATE, NULL); + if (*handle == NULL) { + if (*setattr_async_flag) + *handle = fsfilt_start_log(obd, inode, + FSFILT_OP_CREATE, NULL, + le32_to_cpu(lmm->lmm_stripe_count)); + else + *handle = fsfilt_start(obd, inode, FSFILT_OP_CREATE, NULL); + } if (IS_ERR(*handle)) { rc = PTR_ERR(*handle); *handle = NULL; GOTO(out_oa, rc); } + /* write mds setattr log for created objects */ + if (*setattr_async_flag && lmm_size) { + struct llog_cookie *logcookies = NULL; + + OBD_ALLOC(logcookies, mds->mds_max_cookiesize); + if (logcookies == NULL) + GOTO(out_oa, rc = -ENOMEM); + *ret_logcookies = logcookies; + if (mds_log_op_setattr(obd, inode, lmm, lmm_size, logcookies, + mds->mds_max_cookiesize) <= 0) { + OBD_FREE(logcookies, mds->mds_max_cookiesize); + *ret_logcookies = NULL; + } + } + rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size); lmm_buf = lustre_msg_buf(req->rq_repmsg, offset, 0); lmm_bufsize = req->rq_repmsg->buflens[offset]; @@ -622,7 +645,9 @@ static int accmode(struct inode *inode, int flags) /* Handles object creation, actual opening, and I/O epoch */ static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild, struct mds_body *body, int flags, void **handle, - struct mds_update_record *rec,struct ldlm_reply *rep) + struct mds_update_record *rec,struct ldlm_reply *rep, + struct llog_cookie **logcookies, + int *setattr_async_flag) { struct mds_obd *mds = mds_req2mds(req); struct obd_device *obd = req->rq_export->exp_obd; @@ -652,7 +677,8 @@ static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild, if (!(body->valid & OBD_MD_FLEASIZE)) { /* no EA: create objects */ rc = mds_create_objects(req, 2, rec, mds, obd, - dchild, handle, &ids); + dchild, handle, &ids, + logcookies, setattr_async_flag); if (rc) { CERROR("mds_create_objects: rc = %d\n", rc); up(&dchild->d_inode->i_sem); @@ -688,11 +714,16 @@ static int mds_open_by_fid(struct ptlrpc_request *req, struct ll_fid *fid, struct mds_body *body, int flags, struct mds_update_record *rec,struct ldlm_reply *rep) { + struct obd_device *obd = req->rq_export->exp_obd; struct mds_obd *mds = mds_req2mds(req); struct dentry *dchild; char fidname[LL_FID_NAMELEN]; int fidlen = 0, rc; void *handle = NULL; + struct llog_cookie *logcookies = NULL; + struct lov_mds_md *lmm = NULL; + int lmm_size = 0; + int setattr_async_flag = 0; ENTRY; fidlen = ll_fid2str(fidname, fid->id, fid->generation); @@ -727,9 +758,17 @@ static int mds_open_by_fid(struct ptlrpc_request *req, struct ll_fid *fid, intent_set_disposition(rep, DISP_LOOKUP_POS); open: - rc = mds_finish_open(req, dchild, body, flags, &handle, rec, rep); + rc = mds_finish_open(req, dchild, body, flags, &handle, rec, rep, + &logcookies, &setattr_async_flag); rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle, req, rc, rep ? rep->lock_policy_res1 : 0); + /* do mds to ost setattr for new created objects */ + if (rc == 0 && setattr_async_flag) { + lmm = lustre_msg_buf(req->rq_repmsg, 2, 0); + lmm_size = req->rq_repmsg->buflens[2]; + rc = mds_osc_setattr_async(obd, dchild->d_inode, lmm, lmm_size, + logcookies); + } /* XXX what do we do here if mds_finish_transno itself failed? */ l_dput(dchild); @@ -800,6 +839,12 @@ int mds_open(struct mds_update_record *rec, int offset, int parent_mode = LCK_PR; void *handle = NULL; struct dentry_params dp; + struct lov_mds_md *lmm = NULL; + int lmm_size = 0; + struct llog_cookie *logcookies = NULL; + int setattr_async_flag = 0; + uid_t parent_uid = 0; + gid_t parent_gid = 0; ENTRY; if (offset == 2) { /* intent */ @@ -1028,12 +1073,20 @@ int mds_open(struct mds_update_record *rec, int offset, /* Step 5: mds_open it */ rc = mds_finish_open(req, dchild, body, rec->ur_flags, &handle, rec, - rep); + rep, &logcookies, &setattr_async_flag); GOTO(cleanup, rc); cleanup: rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle, req, rc, rep ? rep->lock_policy_res1 : 0); + /* do mds to ost setattr for new created objects */ + if (rc == 0 && setattr_async_flag) { + lmm = lustre_msg_buf(req->rq_repmsg, 2, 0); + lmm_size = req->rq_repmsg->buflens[2]; + mds_osc_setattr_async(obd, dchild->d_inode, lmm, lmm_size, + logcookies); + } + cleanup_no_trans: switch (cleanup_phase) { case 2: @@ -1046,6 +1099,10 @@ int mds_open(struct mds_update_record *rec, int offset, } } else if (created) { mds_lock_new_child(obd, dchild->d_inode, NULL); + /* save uid/gid for quota acquire/release */ + parent_uid = dparent->d_inode->i_uid; + parent_gid = dparent->d_inode->i_gid; + } l_dput(dchild); case 1: @@ -1058,6 +1115,10 @@ int mds_open(struct mds_update_record *rec, int offset, else ptlrpc_save_lock (req, &parent_lockh, parent_mode); } + + /* trigger dqacq on the owner of child and parent */ + mds_adjust_qunit(obd, current->fsuid, current->fsgid, + parent_uid, parent_gid, rc); RETURN(rc); } diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index bba0365..db4cd58 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -363,6 +363,64 @@ static void reconstruct_reint_setattr(struct mds_update_record *rec, l_dput(de); } +int mds_osc_setattr_async(struct obd_device *obd, struct inode *inode, + struct lov_mds_md *lmm, int lmm_size, + struct llog_cookie *logcookies) +{ + struct mds_obd *mds = &obd->u.mds; + struct lov_stripe_md *lsm = NULL; + struct obd_trans_info oti = { 0 }; + struct obdo *oa = NULL; + int cleanup_phase = 0, rc = 0; + ENTRY; + + if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OST_SETATTR)) + GOTO(cleanup, rc); + + /* first get memory EA */ + oa = obdo_alloc(); + if (!oa) + RETURN(-ENOMEM); + + LASSERT(lmm); + + cleanup_phase = 1; + rc = obd_unpackmd(mds->mds_osc_exp, &lsm, lmm, lmm_size); + if (rc < 0) { + CERROR("Error unpack md %p\n", lmm); + GOTO(cleanup, rc); + } + + cleanup_phase = 2; + /* then fill oa */ + oa->o_id = lsm->lsm_object_id; + oa->o_uid = inode->i_uid; + oa->o_gid = inode->i_gid; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLUID | OBD_MD_FLGID; + if (logcookies) { + oa->o_valid |= OBD_MD_FLCOOKIE; + oti.oti_logcookies = logcookies; + } + + /* do setattr from mds to ost asynchronously */ + rc = obd_setattr_async(mds->mds_osc_exp, oa, lsm, &oti); + if (rc) + CDEBUG(D_INODE, "mds to ost setattr objid 0x"LPX64" on ost error " + "%d\n", lsm->lsm_object_id, rc); +cleanup: + switch(cleanup_phase) { + case 2: + obd_free_memmd(mds->mds_osc_exp, &lsm); + case 1: + obdo_free(oa); + case 0: + if (logcookies) + OBD_FREE(logcookies, mds->mds_max_cookiesize); + } + + RETURN(rc); +} + /* In the raw-setattr case, we lock the child inode. * In the write-back case or if being called from open, the client holds a lock * already. @@ -380,7 +438,12 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, struct lustre_handle lockh; void *handle = NULL; struct mds_logcancel_data *mlcd = NULL; + struct lov_mds_md *lmm = NULL; + struct llog_cookie *logcookies = NULL; + int lmm_size = 0, need_lock = 1; int rc = 0, cleanup_phase = 0, err, locked = 0; + uid_t child_uid = 0; + gid_t child_gid = 0; ENTRY; LASSERT(offset == 0); @@ -408,13 +471,37 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, cleanup_phase = 1; inode = de->d_inode; LASSERT(inode); + + /* save uid/gid for quota acq/rel */ + child_uid = inode->i_uid; + child_gid = inode->i_gid; + if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) && - rec->ur_eadata != NULL) + rec->ur_eadata != NULL) { down(&inode->i_sem); + need_lock = 0; + } OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE, inode->i_sb); - handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL); + /* start a log jounal handle if needed*/ + if (S_ISREG(inode->i_mode) && + rec->ur_iattr.ia_valid & (ATTR_UID | ATTR_GID)) { + lmm_size = mds->mds_max_mdsize; + OBD_ALLOC(lmm, lmm_size); + if (lmm == NULL) + GOTO(cleanup, rc = -ENOMEM); + + cleanup_phase = 2; + rc = mds_get_md(obd, inode, lmm, &lmm_size, need_lock); + if (rc < 0) + GOTO(cleanup, rc); + + handle = fsfilt_start_log(obd, inode, FSFILT_OP_SETATTR, NULL, + le32_to_cpu(lmm->lmm_stripe_count)); + } else { + handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL); + } if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); @@ -429,8 +516,22 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, if (rec->ur_iattr.ia_valid & ATTR_ATTR_FLAG) /* ioctl */ rc = fsfilt_iocontrol(obd, inode, NULL, EXT3_IOC_SETFLAGS, (long)&rec->ur_iattr.ia_attr_flags); - else /* setattr */ + else { /* setattr */ rc = fsfilt_setattr(obd, de, handle, &rec->ur_iattr, 0); + /* journal chown/chgrp in llog, just like unlink */ + if (rc == 0 && S_ISREG(inode->i_mode) && + rec->ur_iattr.ia_valid & (ATTR_UID | ATTR_GID) && lmm_size){ + OBD_ALLOC(logcookies, mds->mds_max_cookiesize); + if (logcookies == NULL) + GOTO(cleanup, rc = -ENOMEM); + if (mds_log_op_setattr(obd, inode, lmm, lmm_size, + logcookies, + mds->mds_max_cookiesize) <= 0) { + OBD_FREE(logcookies, mds->mds_max_cookiesize); + logcookies = NULL; + } + } + } if (rc == 0 && (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) && rec->ur_eadata != NULL) { @@ -501,7 +602,13 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, fsfilt_add_journal_cb(req->rq_export->exp_obd, 0, handle, mds_cancel_cookies_cb, mlcd); err = mds_finish_transno(mds, inode, handle, req, rc, 0); + /* do mds to ost setattr if needed */ + if (!rc && !err && lmm_size) + mds_osc_setattr_async(obd, inode, lmm, lmm_size, logcookies); + switch (cleanup_phase) { + case 2: + OBD_FREE(lmm, mds->mds_max_mdsize); case 1: if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) && rec->ur_eadata != NULL) @@ -523,6 +630,13 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, rc = err; req->rq_status = rc; + + /* trigger dqrel/dqacq for original owner and new owner */ + if (rec->ur_iattr.ia_valid & (ATTR_UID | ATTR_GID)) { + mds_adjust_qunit(obd, rec->ur_iattr.ia_uid, + rec->ur_iattr.ia_gid, 0, 0, rc); + mds_adjust_qunit(obd, child_uid, child_gid, 0, 0, rc); + } return 0; } @@ -563,6 +677,8 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, struct lustre_handle lockh; int rc = 0, err, type = rec->ur_mode & S_IFMT, cleanup_phase = 0; int created = 0; + uid_t parent_uid = 0; + gid_t parent_gid = 0; struct dentry_params dp; ENTRY; @@ -766,9 +882,14 @@ cleanup: CERROR("unlink in error path: %d\n", err); break; } + } else if (created) { + /* save uid/gid of create inode and parent */ + parent_uid = dir->i_uid; + parent_gid = dir->i_gid; } else { rc = err; } + switch (cleanup_phase) { case 2: /* child dentry */ l_dput(dchild); @@ -786,6 +907,10 @@ cleanup: LBUG(); } req->rq_status = rc; + + /* trigger dqacq on the owner of child and parent */ + mds_adjust_qunit(obd, current->fsuid, current->fsgid, + parent_uid, parent_gid, rc); return 0; } @@ -1241,6 +1366,8 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset, struct lustre_handle parent_lockh, child_lockh, child_reuse_lockh; void *handle = NULL; int rc = 0, cleanup_phase = 0; + uid_t child_uid = 0, parent_uid = 0; + gid_t child_gid = 0, parent_gid = 0; ENTRY; LASSERT(offset == 0 || offset == 2); @@ -1270,6 +1397,12 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset, GOTO(cleanup, rc = -ENOENT); } + /* save uid/gid for quota acquire/release */ + child_uid = child_inode->i_uid; + child_gid = child_inode->i_gid; + parent_uid = dparent->d_inode->i_uid; + parent_gid = dparent->d_inode->i_gid; + cleanup_phase = 2; /* dchild has a lock */ /* We have to do these checks ourselves, in case we are making an @@ -1440,6 +1573,9 @@ cleanup: LBUG(); } req->rq_status = rc; + + /* trigger dqrel on the owner of child and parent */ + mds_adjust_qunit(obd, child_uid, child_gid, parent_uid, parent_gid, rc); return 0; } diff --git a/lustre/mds/quota_context.c b/lustre/mds/quota_context.c new file mode 100644 index 0000000..907255a --- /dev/null +++ b/lustre/mds/quota_context.c @@ -0,0 +1,588 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/mds/quota_context.c + * Lustre Quota Context + * + * Copyright (c) 2001-2003 Cluster File Systems, Inc. + * Author: Niu YaWei + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +#define DEBUG_SUBSYSTEM S_MDS + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +const unsigned long default_bunit_sz = 100 * 1024 * 1024; /* 100M bytes */ +const unsigned long default_btune_sz = 50 * 1024 * 1024; /* 50M bytes */ +const unsigned long default_iunit_sz = 5000; /* 5000 inodes */ +const unsigned long default_itune_sz = 2500; /* 2500 inodes */ + +static inline int const +qunit_hashfn(struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata) +{ + unsigned int id = qdata->qd_id; + unsigned int type = qdata->qd_type; + + unsigned long tmp = ((unsigned long)qctxt >> L1_CACHE_SHIFT) ^ id; + tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH; + return tmp; +} + +static inline struct lustre_qunit *find_qunit(unsigned int hashent, + struct lustre_quota_ctxt *qctxt, + struct qunit_data *qdata) +{ + struct list_head *pos; + struct lustre_qunit *qunit = NULL; + struct qunit_data *tmp; + + list_for_each(pos, qunit_hash + hashent) { + qunit = list_entry(pos, struct lustre_qunit, lq_hash); + tmp = &qunit->lq_data; + if (qunit->lq_ctxt == qctxt && + qdata->qd_id == tmp->qd_id && qdata->qd_type == tmp->qd_type + && qdata->qd_isblk == tmp->qd_isblk) + return qunit; + } + return NULL; +} + +/* check_cur_qunit - check the current usage of qunit. + * @qctxt: quota context + * @qdata: the type of quota unit to be checked + * + * return: 1 - need acquire qunit; + * 2 - need release qunit; + * 0 - need do nothing. + * < 0 - error. + */ +static int +check_cur_qunit(struct obd_device *obd, + struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata) +{ + struct super_block *sb = qctxt->lqc_sb; + unsigned long qunit_sz, tune_sz; + __u64 usage, limit; + struct obd_quotactl *qctl = NULL; + int ret = 0; + ENTRY; + + if (!sb_any_quota_enabled(sb)) + RETURN(0); + + /* ignore root user */ + if (qdata->qd_id == 0 && qdata->qd_type == USRQUOTA) + RETURN(0); + + OBD_ALLOC(qctl, sizeof(*qctl)); + if (qctl == NULL) + RETURN(-ENOMEM); + + /* get fs quota usage & limit */ + qctl->qc_cmd = Q_GETQUOTA; + qctl->qc_id = qdata->qd_id; + qctl->qc_type = qdata->qd_type; + ret = fsfilt_quotactl(obd, sb, qctl); + if (ret) { + if (ret == -ESRCH) /* no limit */ + ret = 0; + else + CERROR("can't get fs quota usage! (rc:%d)\n", ret); + GOTO(out, ret); + } + + if (qdata->qd_isblk) { + usage = qctl->qc_dqblk.dqb_curspace; + limit = qctl->qc_dqblk.dqb_bhardlimit; + qunit_sz = qctxt->lqc_bunit_sz; + tune_sz = qctxt->lqc_btune_sz; + + LASSERT(!(qunit_sz % QUOTABLOCK_SIZE)); + LASSERT(limit == MIN_QLIMIT + || !((__u32) limit % toqb(qunit_sz))); + limit = limit << QUOTABLOCK_BITS; + } else { + usage = qctl->qc_dqblk.dqb_curinodes; + limit = qctl->qc_dqblk.dqb_ihardlimit; + qunit_sz = qctxt->lqc_iunit_sz; + tune_sz = qctxt->lqc_itune_sz; + } + + /* if it's not first time to set quota, ignore the no quota limit + * case */ + if (!limit) + GOTO(out, ret = 0); + + /* we don't count the MIN_QLIMIT */ + if ((limit == MIN_QLIMIT && !qdata->qd_isblk) || + (toqb(limit) == MIN_QLIMIT && qdata->qd_isblk)) + limit = 0; + + LASSERT(qdata->qd_count == 0); + if (limit <= usage + tune_sz) { + while (qdata->qd_count + limit <= usage + tune_sz) + qdata->qd_count += qunit_sz; + ret = 1; + } else if (limit > usage + qunit_sz + tune_sz) { + while (limit - qdata->qd_count > usage + qunit_sz + tune_sz) + qdata->qd_count += qunit_sz; + ret = 2; + } + LASSERT(ret == 0 || qdata->qd_count); +out: + OBD_FREE(qctl, sizeof(*qctl)); + RETURN(ret); +} + +/* must hold qctxt->lqc_qunit_lock */ +static struct lustre_qunit *dqacq_in_flight(struct lustre_quota_ctxt *qctxt, + struct qunit_data *qdata) +{ + unsigned int hashent = qunit_hashfn(qctxt, qdata); + struct lustre_qunit *qunit = NULL; + ENTRY; + + qunit = find_qunit(hashent, qctxt, qdata); + RETURN(qunit); +} + +static struct lustre_qunit *alloc_qunit(struct lustre_quota_ctxt *qctxt, + struct qunit_data *qdata, int opc) +{ + struct lustre_qunit *qunit = NULL; + ENTRY; + + OBD_SLAB_ALLOC(qunit, qunit_cachep, SLAB_NOFS, sizeof(*qunit)); + if (qunit == NULL) + RETURN(NULL); + + INIT_LIST_HEAD(&qunit->lq_hash); + INIT_LIST_HEAD(&qunit->lq_waiters); + atomic_set(&qunit->lq_refcnt, 1); + qunit->lq_ctxt = qctxt; + memcpy(&qunit->lq_data, qdata, sizeof(*qdata)); + qunit->lq_opc = opc; + + RETURN(qunit); +} + +static inline void free_qunit(struct lustre_qunit *qunit) +{ + OBD_SLAB_FREE(qunit, qunit_cachep, sizeof(*qunit)); +} + +static inline void qunit_get(struct lustre_qunit *qunit) +{ + atomic_inc(&qunit->lq_refcnt); +} + +static void qunit_put(struct lustre_qunit *qunit) +{ + LASSERT(atomic_read(&qunit->lq_refcnt)); + if (atomic_dec_and_test(&qunit->lq_refcnt)) + free_qunit(qunit); +} + +static void +insert_qunit_nolock(struct lustre_quota_ctxt *qctxt, struct lustre_qunit *qunit) +{ + struct list_head *head; + + head = qunit_hash + qunit_hashfn(qctxt, &qunit->lq_data); + list_add(&qunit->lq_hash, head); +} + +static void remove_qunit_nolock(struct lustre_qunit *qunit) +{ + LASSERT(!list_empty(&qunit->lq_hash)); + list_del_init(&qunit->lq_hash); +} + +struct qunit_waiter { + struct list_head qw_entry; + wait_queue_head_t qw_waitq; + int qw_rc; +}; + +#define QDATA_DEBUG(qd, fmt, arg...) \ + CDEBUG(D_QUOTA, "id(%u) type(%u) count(%u) isblk(%u):" \ + fmt, qd->qd_id, qd->qd_type, qd->qd_count, qd->qd_isblk, \ + ## arg); \ + +#define INC_QLIMIT(limit, count) (limit == MIN_QLIMIT) ? \ + (limit = count) : (limit += count) + +static int +dqacq_completion(struct obd_device *obd, + struct lustre_quota_ctxt *qctxt, + struct qunit_data *qdata, int rc, int opc) +{ + struct lustre_qunit *qunit = NULL; + struct super_block *sb = qctxt->lqc_sb; + unsigned long qunit_sz; + struct list_head *pos, *tmp; + int err = 0; + ENTRY; + + LASSERT(qdata); + qunit_sz = + (qdata->qd_isblk) ? qctxt->lqc_bunit_sz : qctxt->lqc_iunit_sz; + LASSERT(!(qdata->qd_count % qunit_sz)); + + /* update local operational quota file */ + if (rc == 0) { + __u32 count = QUSG(qdata->qd_count, qdata->qd_isblk); + struct obd_quotactl *qctl = NULL; + + OBD_ALLOC(qctl, sizeof(*qctl)); + if (qctl == NULL) + GOTO(out, err = -ENOMEM); + + /* acq/rel qunit for specified uid/gid is serialized, + * so there is no race between get fs quota limit and + * set fs quota limit */ + qctl->qc_cmd = Q_GETQUOTA; + qctl->qc_id = qdata->qd_id; + qctl->qc_type = qdata->qd_type; + err = fsfilt_quotactl(obd, sb, qctl); + if (err) { + CERROR("error get quota fs limit! (rc:%d)\n", err); + GOTO(out_mem, err); + } + + switch (opc) { + case QUOTA_DQACQ: + if (qdata->qd_isblk) { + qctl->qc_dqblk.dqb_valid = QIF_BLIMITS; + INC_QLIMIT(qctl->qc_dqblk.dqb_bhardlimit, + count); + } else { + qctl->qc_dqblk.dqb_valid = QIF_ILIMITS; + INC_QLIMIT(qctl->qc_dqblk.dqb_ihardlimit, + count); + } + break; + case QUOTA_DQREL: + if (qdata->qd_isblk) { + LASSERT(count < qctl->qc_dqblk.dqb_bhardlimit); + qctl->qc_dqblk.dqb_valid = QIF_BLIMITS; + qctl->qc_dqblk.dqb_bhardlimit -= count; + } else { + LASSERT(count < qctl->qc_dqblk.dqb_ihardlimit); + qctl->qc_dqblk.dqb_valid = QIF_ILIMITS; + qctl->qc_dqblk.dqb_ihardlimit -= count; + } + break; + default: + LBUG(); + break; + } + + /* clear quota limit */ + if (count == 0) { + if (qdata->qd_isblk) + qctl->qc_dqblk.dqb_bhardlimit = 0; + else + qctl->qc_dqblk.dqb_ihardlimit = 0; + } + + qctl->qc_cmd = Q_SETQUOTA; + err = fsfilt_quotactl(obd, sb, qctl); + if (err) + CERROR("error set quota fs limit! (rc:%d)\n", err); + + QDATA_DEBUG(qdata, "%s completion\n", + opc == QUOTA_DQACQ ? "DQACQ" : "DQREL"); +out_mem: + OBD_FREE(qctl, sizeof(*qctl)); + } else if (rc == -EDQUOT) { + CWARN("acquire qunit got EDQUOT\n"); + } else { + CERROR("acquire qunit got error! (rc:%d)\n", rc); + } +out: + /* remove the qunit from hash */ + spin_lock(&qunit_hash_lock); + + qunit = dqacq_in_flight(qctxt, qdata); + + LASSERT(qunit); + LASSERT(opc == qunit->lq_opc); + remove_qunit_nolock(qunit); + + /* wake up all waiters */ + list_for_each_safe(pos, tmp, &qunit->lq_waiters) { + struct qunit_waiter *qw = list_entry(pos, struct qunit_waiter, + qw_entry); + list_del_init(&qw->qw_entry); + qw->qw_rc = rc; + wake_up(&qw->qw_waitq); + } + + spin_unlock(&qunit_hash_lock); + + qunit_put(qunit); + RETURN(err); +} + +struct dqacq_async_args { + struct lustre_quota_ctxt *aa_ctxt; + struct lustre_qunit *aa_qunit; +}; + +static int dqacq_interpret(struct ptlrpc_request *req, void *data, int rc) +{ + struct dqacq_async_args *aa = (struct dqacq_async_args *)data; + struct lustre_quota_ctxt *qctxt = aa->aa_ctxt; + struct lustre_qunit *qunit = aa->aa_qunit; + struct obd_device *obd = req->rq_import->imp_obd; + struct qunit_data *qdata = NULL; + ENTRY; + + qdata = lustre_swab_repbuf(req, 0, sizeof(*qdata), lustre_swab_qdata); + if (rc == 0 && qdata == NULL) + RETURN(-EPROTO); + + LASSERT(qdata->qd_id == qunit->lq_data.qd_id && + qdata->qd_type == qunit->lq_data.qd_type && + (qdata->qd_count == qunit->lq_data.qd_count || + qdata->qd_count == 0)); + + QDATA_DEBUG(qdata, "%s interpret rc(%d).\n", + req->rq_reqmsg->opc == QUOTA_DQACQ ? "DQACQ" : "DQREL", rc); + + rc = dqacq_completion(obd, qctxt, qdata, rc, req->rq_reqmsg->opc); + + RETURN(rc); +} + +static int got_qunit(struct qunit_waiter *waiter) +{ + int rc = 0; + ENTRY; + spin_lock(&qunit_hash_lock); + rc = list_empty(&waiter->qw_entry); + spin_unlock(&qunit_hash_lock); + RETURN(rc); +} + +static int +schedule_dqacq(struct obd_device *obd, + struct lustre_quota_ctxt *qctxt, + struct qunit_data *qdata, int opc, int wait) +{ + struct lustre_qunit *qunit = NULL; + struct qunit_waiter qw; + struct l_wait_info lwi = { 0 }; + int rc = 0; + ENTRY; + + INIT_LIST_HEAD(&qw.qw_entry); + init_waitqueue_head(&qw.qw_waitq); + qw.qw_rc = 0; + + spin_lock(&qunit_hash_lock); + + qunit = dqacq_in_flight(qctxt, qdata); + if (qunit && wait) { + list_add_tail(&qw.qw_entry, &qunit->lq_waiters); + spin_unlock(&qunit_hash_lock); + goto wait_completion; + } else if (qunit && !wait) { + qunit = NULL; + } else if (!qunit && (qunit = alloc_qunit(qctxt, qdata, opc)) != NULL) + insert_qunit_nolock(qctxt, qunit); + + spin_unlock(&qunit_hash_lock); + + if (qunit) { + struct ptlrpc_request *req; + struct qunit_data *reqdata; + struct dqacq_async_args *aa; + int size = sizeof(*reqdata); + + /* master is going to dqacq/dqrel from itself */ + if (qctxt->lqc_handler) { + int rc2; + QDATA_DEBUG(qdata, "local %s.\n", + opc == QUOTA_DQACQ ? "DQACQ" : "DQREL"); + rc = qctxt->lqc_handler(obd, qdata, opc); + rc2 = dqacq_completion(obd, qctxt, qdata, rc, opc); + RETURN((rc && rc != -EDQUOT) ? rc : rc2); + } + + /* build dqacq/dqrel request */ + LASSERT(qctxt->lqc_import); + req = ptlrpc_prep_req(qctxt->lqc_import, opc, 1, &size, NULL); + if (!req) + RETURN(-ENOMEM); + + reqdata = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*reqdata)); + memcpy(reqdata, qdata, sizeof(*reqdata)); + size = sizeof(*reqdata); + req->rq_replen = lustre_msg_size(1, &size); + + LASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); + aa = (struct dqacq_async_args *)&req->rq_async_args; + aa->aa_ctxt = qctxt; + aa->aa_qunit = qunit; + + req->rq_interpret_reply = dqacq_interpret; + ptlrpcd_add_req(req); + + QDATA_DEBUG(qdata, "%s scheduled.\n", + opc == QUOTA_DQACQ ? "DQACQ" : "DQREL"); + } +wait_completion: + if (wait && qunit) { + struct qunit_data *p = &qunit->lq_data; + QDATA_DEBUG(p, "wait for dqacq.\n"); + + l_wait_event(qw.qw_waitq, got_qunit(&qw), &lwi); + if (qw.qw_rc == 0) + rc = -EAGAIN; + + QDATA_DEBUG(p, "wait dqacq done. (rc:%d)\n", qw.qw_rc); + } + RETURN(rc); +} + +int +qctxt_adjust_qunit(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, + uid_t uid, gid_t gid, __u32 isblk) +{ + int ret, rc = 0, i = USRQUOTA; + struct qunit_data qdata[MAXQUOTAS]; + ENTRY; + + if (!sb_any_quota_enabled(qctxt->lqc_sb)) + RETURN(0); + + qdata[USRQUOTA].qd_id = uid; + qdata[USRQUOTA].qd_type = USRQUOTA; + qdata[USRQUOTA].qd_isblk = isblk; + qdata[USRQUOTA].qd_count = 0; + qdata[GRPQUOTA].qd_id = gid; + qdata[GRPQUOTA].qd_type = GRPQUOTA; + qdata[GRPQUOTA].qd_isblk = isblk; + qdata[GRPQUOTA].qd_count = 0; + +next: + ret = check_cur_qunit(obd, qctxt, &qdata[i]); + if (ret > 0) { + int opc; + /* need acquire or release */ + opc = ret == 1 ? QUOTA_DQACQ : QUOTA_DQREL; + ret = schedule_dqacq(obd, qctxt, &qdata[i], opc, 0); + if (!rc) + rc = ret; + } + if (++i < MAXQUOTAS) + goto next; + + RETURN(rc); +} + +int +qctxt_wait_on_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, + uid_t uid, gid_t gid, __u32 isblk) +{ + struct qunit_data qdata[MAXQUOTAS]; + int i = USRQUOTA, ret, rc = -EAGAIN; + ENTRY; + + if (!sb_any_quota_enabled(qctxt->lqc_sb)) + RETURN(0); + + qdata[USRQUOTA].qd_id = uid; + qdata[USRQUOTA].qd_type = USRQUOTA; + qdata[USRQUOTA].qd_isblk = isblk; + qdata[USRQUOTA].qd_count = 0; + qdata[GRPQUOTA].qd_id = gid; + qdata[GRPQUOTA].qd_type = GRPQUOTA; + qdata[GRPQUOTA].qd_isblk = isblk; + qdata[GRPQUOTA].qd_count = 0; + +next: + ret = check_cur_qunit(obd, qctxt, &qdata[i]); + if (ret > 0) + rc = schedule_dqacq(obd, qctxt, &qdata[i], QUOTA_DQACQ, 1); + + if (++i < MAXQUOTAS) + goto next; + + RETURN(rc); +} + +int +qctxt_init(struct lustre_quota_ctxt *qctxt, struct super_block *sb, + dqacq_handler_t handler) +{ + int rc = 0; + ENTRY; + + rc = ptlrpcd_addref(); + if (rc) + RETURN(rc); + + qctxt->lqc_handler = handler; + qctxt->lqc_sb = sb; + qctxt->lqc_import = NULL; + qctxt->lqc_flags = 0; + qctxt->lqc_bunit_sz = default_bunit_sz; + qctxt->lqc_btune_sz = default_btune_sz; + qctxt->lqc_iunit_sz = default_iunit_sz; + qctxt->lqc_itune_sz = default_itune_sz; + + RETURN(0); +} + +void qctxt_cleanup(struct lustre_quota_ctxt *qctxt, int force) +{ + struct list_head *pos, *tmp; + struct lustre_qunit *qunit; + int i; + ENTRY; + + ptlrpcd_decref(); + + spin_lock(&qunit_hash_lock); + + for (i = 0; i < NR_DQHASH; i++) { + list_for_each_safe(pos, tmp, &qunit_hash[i]) { + qunit = list_entry(pos, struct lustre_qunit, lq_hash); + LASSERT(qunit->lq_ctxt != qctxt); + } + } + + spin_unlock(&qunit_hash_lock); + EXIT; +} diff --git a/lustre/mds/quota_master.c b/lustre/mds/quota_master.c new file mode 100644 index 0000000..df67fa0 --- /dev/null +++ b/lustre/mds/quota_master.c @@ -0,0 +1,752 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/mds/quota_master.c + * Lustre Quota Master request handler + * + * Copyright (c) 2001-2003 Cluster File Systems, Inc. + * Author: Niu YaWei + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +#define DEBUG_SUBSYSTEM S_MDS + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "mds_internal.h" + +static struct list_head lustre_dquot_hash[NR_DQHASH]; +static spinlock_t dquot_hash_lock = SPIN_LOCK_UNLOCKED; + +kmem_cache_t *lustre_dquot_cachep; + +int lustre_dquot_init(void) +{ + int i; + ENTRY; + + LASSERT(lustre_dquot_cachep == NULL); + lustre_dquot_cachep = kmem_cache_create("lustre_dquot_cache", + sizeof(struct lustre_dquot), + 0, 0, NULL, NULL); + if (!lustre_dquot_cachep) + return (-ENOMEM); + + for (i = 0; i < NR_DQHASH; i++) { + INIT_LIST_HEAD(lustre_dquot_hash + i); + } + RETURN(0); +} + +void lustre_dquot_exit(void) +{ + int i; + ENTRY; + /* FIXME cleanup work ?? */ + + for (i = 0; i < NR_DQHASH; i++) { + LASSERT(list_empty(lustre_dquot_hash + i)); + } + if (lustre_dquot_cachep) { + LASSERTF(kmem_cache_destroy(lustre_dquot_cachep) == 0, + "Cannot destroy lustre_dquot_cache\n"); + lustre_dquot_cachep = NULL; + } + EXIT; +} + +static inline int const dquot_hashfn(struct lustre_quota_info *info, + unsigned int id, int type) +{ + unsigned long tmp = ((unsigned long)info >> L1_CACHE_SHIFT) ^ id; + tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH; + return tmp; +} + +static struct lustre_dquot *find_dquot(int hashent, + struct lustre_quota_info *lqi, qid_t id, + int type) +{ + struct list_head *head; + struct lustre_dquot *dquot; + ENTRY; + + for (head = lustre_dquot_hash[hashent].next; + head != lustre_dquot_hash + hashent; head = head->next) { + dquot = list_entry(head, struct lustre_dquot, dq_hash); + if (dquot->dq_info == lqi && + dquot->dq_id == id && dquot->dq_type == type) + RETURN(dquot); + } + RETURN(NULL); +} + +static struct lustre_dquot *alloc_dquot(struct lustre_quota_info *lqi, + qid_t id, int type) +{ + struct lustre_dquot *dquot = NULL; + ENTRY; + + OBD_SLAB_ALLOC(dquot, lustre_dquot_cachep, SLAB_NOFS, sizeof(*dquot)); + if (dquot == NULL) + RETURN(NULL); + + INIT_LIST_HEAD(&dquot->dq_hash); + INIT_LIST_HEAD(&dquot->dq_unused); + sema_init(&dquot->dq_sem, 1); + atomic_set(&dquot->dq_refcnt, 1); + dquot->dq_info = lqi; + dquot->dq_id = id; + dquot->dq_type = type; + + RETURN(dquot); +} + +static void free_dquot(struct lustre_dquot *dquot) +{ + OBD_SLAB_FREE(dquot, lustre_dquot_cachep, sizeof(*dquot)); +} + +static void insert_dquot_nolock(struct lustre_dquot *dquot) +{ + struct list_head *head = lustre_dquot_hash + + dquot_hashfn(dquot->dq_info, dquot->dq_id, dquot->dq_type); + list_add(&dquot->dq_hash, head); +} + +static void remove_dquot_nolock(struct lustre_dquot *dquot) +{ + LASSERT(!list_empty(&dquot->dq_hash)); + list_del_init(&dquot->dq_hash); +} + +static void lustre_dqput(struct lustre_dquot *dquot) +{ + ENTRY; + spin_lock(&dquot_hash_lock); + LASSERT(atomic_read(&dquot->dq_refcnt)); + if (atomic_dec_and_test(&dquot->dq_refcnt)) { + remove_dquot_nolock(dquot); + free_dquot(dquot); + } + spin_unlock(&dquot_hash_lock); + EXIT; +} + +#define DQUOT_DEBUG(dquot, fmt, arg...) \ + CDEBUG(D_QUOTA, "refcnt(%u) id(%u) type(%u) off("LPX64") flags(%lu) " \ + "bhardlimit(%u) curspace("LPX64") ihardlimit(%u) " \ + "curinodes(%u): " fmt, atomic_read(&dquot->dq_refcnt), \ + dquot->dq_id, dquot->dq_type, dquot->dq_off, dquot->dq_flags, \ + dquot->dq_dqb.dqb_bhardlimit, dquot->dq_dqb.dqb_curspace, \ + dquot->dq_dqb.dqb_ihardlimit, dquot->dq_dqb.dqb_curinodes, \ + ## arg); \ + +#define QINFO_DEBUG(qinfo, fmt, arg...) \ + CDEBUG(D_QUOTA, "files (%p/%p) flags(%lu/%lu) blocks(%u/%u) " \ + "free_blk(/%u/%u) free_entry(%u/%u): " fmt, \ + qinfo->qi_files[0], qinfo->qi_files[1], \ + qinfo->qi_info[0].dqi_flags, qinfo->qi_info[1].dqi_flags, \ + qinfo->qi_info[0].dqi_blocks, qinfo->qi_info[1].dqi_blocks, \ + qinfo->qi_info[0].dqi_free_blk, qinfo->qi_info[1].dqi_free_blk,\ + qinfo->qi_info[0].dqi_free_entry, \ + qinfo->qi_info[1].dqi_free_entry, ## arg); + +static struct lustre_dquot *lustre_dqget(struct obd_device *obd, + struct lustre_quota_info *lqi, + qid_t id, int type) +{ + unsigned int hashent = dquot_hashfn(lqi, id, type); + struct lustre_dquot *dquot = NULL; + int read = 0; + ENTRY; + + spin_lock(&dquot_hash_lock); + if ((dquot = find_dquot(hashent, lqi, id, type)) != NULL) { + atomic_inc(&dquot->dq_refcnt); + } else { + dquot = alloc_dquot(lqi, id, type); + if (dquot) { + insert_dquot_nolock(dquot); + read = 1; + } + } + spin_unlock(&dquot_hash_lock); + + if (dquot == NULL) + RETURN(ERR_PTR(-ENOMEM)); + + if (read) { + int rc = 0; + + down(&dquot->dq_info->qi_sem); + down(&dquot->dq_sem); + rc = fsfilt_dquot(obd, dquot, QFILE_RD_DQUOT); + up(&dquot->dq_sem); + up(&dquot->dq_info->qi_sem); + if (rc) { + CERROR("can't read dquot from admin qutoafile! " + "(rc:%d)\n", rc); + lustre_dqput(dquot); + RETURN(ERR_PTR(rc)); + } + } + RETURN(dquot); +} + +int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc) +{ + struct mds_obd *mds = &obd->u.mds; + struct lustre_quota_info *info = &mds->mds_quota_info; + struct lustre_dquot *dquot = NULL; + __u64 *usage = NULL; + __u32 *limit = NULL; + int rc = 0; + ENTRY; + + dquot = lustre_dqget(obd, info, qdata->qd_id, qdata->qd_type); + if (IS_ERR(dquot)) + RETURN(PTR_ERR(dquot)); + + DQUOT_DEBUG(dquot, "get dquot in dqacq_handler\n"); + QINFO_DEBUG(dquot->dq_info, "get dquot in dqadq_handler\n"); + + down(&dquot->dq_info->qi_sem); + down(&dquot->dq_sem); + + if (qdata->qd_isblk) { + usage = &dquot->dq_dqb.dqb_curspace; + limit = &dquot->dq_dqb.dqb_bhardlimit; + } else { + usage = (__u64 *) & dquot->dq_dqb.dqb_curinodes; + limit = &dquot->dq_dqb.dqb_ihardlimit; + } + + /* if the quota limit in admin quotafile is zero, we just inform + * slave to clear quota limit with zero qd_count */ + if (*limit == 0) { + qdata->qd_count = 0; + GOTO(out, rc); + } + if (opc == QUOTA_DQACQ) { + if (QUSG(*usage + qdata->qd_count, qdata->qd_isblk) > *limit) + GOTO(out, rc = -EDQUOT); + else + *usage += qdata->qd_count; + } else if (opc == QUOTA_DQREL) { + LASSERT(*usage - qdata->qd_count >= 0); + *usage -= qdata->qd_count; + } else { + LBUG(); + } + + rc = fsfilt_dquot(obd, dquot, QFILE_WR_DQUOT); +out: + up(&dquot->dq_sem); + up(&dquot->dq_info->qi_sem); + lustre_dqput(dquot); + RETURN(rc); +} + +void mds_adjust_qunit(struct obd_device *obd, uid_t cuid, gid_t cgid, + uid_t puid, gid_t pgid, int rc) +{ + struct mds_obd *mds = &obd->u.mds; + struct lustre_quota_ctxt *qctxt = &mds->mds_quota_ctxt; + ENTRY; + + if (rc && rc != -EDQUOT) { + EXIT; + return; + } + /* dqacq/dqrel file quota on owner of child */ + rc = qctxt_adjust_qunit(obd, qctxt, cuid, cgid, 0); + if (rc) + CERROR("error mds adjust child qunit! (rc:%d)\n", rc); + /* dqacq/dqrel block quota on owner of parent directory */ + rc = qctxt_adjust_qunit(obd, qctxt, puid, pgid, 1); + if (rc) + CERROR("error mds adjust parent qunit! (rc:%d)\n", rc); + EXIT; +} + +int init_admin_quotafiles(struct obd_device *obd, struct obd_quotactl *oqctl) +{ + struct mds_obd *mds = &obd->u.mds; + struct lustre_quota_info *qinfo = &mds->mds_quota_info; + const char *quotafiles[] = LUSTRE_ADMIN_QUOTAFILES; + struct obd_run_ctxt saved; + char name[64]; + int i, rc = 0; + struct dentry *dparent = mds->mds_objects_dir; + struct inode *iparent = dparent->d_inode; + ENTRY; + + LASSERT(iparent); + push_ctxt(&saved, &obd->obd_ctxt, NULL); + + down(&qinfo->qi_sem); + for (i = 0; i < MAXQUOTAS; i++) { + struct dentry *de = NULL; + struct file *fp = NULL; + + if (!Q_TYPESET(oqctl, i)) + continue; + + /* quota file has been opened ? */ + if (qinfo->qi_files[i]) { + CWARN("init %s admin quotafile while quota on.\n", + i == USRQUOTA ? "user" : "group"); + continue; + } + + /* lookup quota file */ + rc = 0; + down(&iparent->i_sem); + + de = lookup_one_len(quotafiles[i], dparent, + strlen(quotafiles[i])); + if (IS_ERR(de) || de->d_inode == NULL) + rc = IS_ERR(de) ? PTR_ERR(de) : -ENOENT; + if (!IS_ERR(de)) + dput(de); + up(&iparent->i_sem); + + if (rc && rc != -ENOENT) { + CERROR("error lookup quotafile %s! (rc:%d)\n", + name, rc); + break; + } else if (!rc) { + continue; + } + + sprintf(name, "OBJECTS/%s", quotafiles[i]); + + LASSERT(rc == -ENOENT); + /* create quota file */ + fp = filp_open(name, O_CREAT | O_EXCL, 0644); + if (IS_ERR(fp)) { + rc = PTR_ERR(fp); + CERROR("error creating admin quotafile %s (rc:%d)\n", + name, rc); + break; + } + + qinfo->qi_files[i] = fp; + rc = fsfilt_quotainfo(obd, qinfo, i, QFILE_INIT_INFO); + filp_close(fp, 0); + qinfo->qi_files[i] = NULL; + + if (rc) { + CERROR("error init %s admin quotafile! (rc:%d)\n", + i == USRQUOTA ? "user" : "group", rc); + break; + } + } + up(&qinfo->qi_sem); + + pop_ctxt(&saved, &obd->obd_ctxt, NULL); + RETURN(rc); +} + +int mds_quota_on(struct obd_device *obd, struct obd_quotactl *oqctl) +{ + struct mds_obd *mds = &obd->u.mds; + struct lustre_quota_info *qinfo = &mds->mds_quota_info; + const char *quotafiles[] = LUSTRE_ADMIN_QUOTAFILES; + struct obd_run_ctxt saved; + char name[64]; + int i, rc = 0; + struct inode *iparent = mds->mds_objects_dir->d_inode; + ENTRY; + + LASSERT(iparent); + push_ctxt(&saved, &obd->obd_ctxt, NULL); + + down(&qinfo->qi_sem); + /* open admin quota files and read quotafile info */ + for (i = 0; i < MAXQUOTAS; i++) { + struct file *fp = NULL; + + if (!Q_TYPESET(oqctl, i)) + continue; + + sprintf(name, "OBJECTS/%s", quotafiles[i]); + + if (qinfo->qi_files[i] != NULL) { + rc = -EBUSY; + break; + } + + fp = filp_open(name, O_RDWR | O_EXCL, 0644); + if (IS_ERR(fp)) { + rc = PTR_ERR(fp); + CERROR("error open %s! (rc:%d)\n", name, rc); + break; + } + qinfo->qi_files[i] = fp; + + rc = fsfilt_quotainfo(obd, qinfo, i, QFILE_RD_INFO); + if (rc) { + CERROR("error read quotainfo of %s! (rc:%d)\n", + name, rc); + break; + } + } + up(&qinfo->qi_sem); + + pop_ctxt(&saved, &obd->obd_ctxt, NULL); + + if (rc && rc != -EBUSY) { + down(&qinfo->qi_sem); + for (i = 0; i < MAXQUOTAS; i++) { + if (!Q_TYPESET(oqctl, i)) + continue; + if (qinfo->qi_files[i]) + filp_close(qinfo->qi_files[i], 0); + qinfo->qi_files[i] = NULL; + } + up(&qinfo->qi_sem); + } + RETURN(rc); +} + +int mds_quota_off(struct obd_device *obd, struct obd_quotactl *oqctl) +{ + struct mds_obd *mds = &obd->u.mds; + struct lustre_quota_info *qinfo = &mds->mds_quota_info; + int i, rc = 0; + ENTRY; + + down(&qinfo->qi_sem); + /* close admin quota files */ + for (i = 0; i < MAXQUOTAS; i++) { + if (!Q_TYPESET(oqctl, i)) + continue; + + if (qinfo->qi_files[i] == NULL) { + rc = -ESRCH; + continue; + } + filp_close(qinfo->qi_files[i], 0); + qinfo->qi_files[i] = NULL; + } + up(&qinfo->qi_sem); + + RETURN(rc); +} + +int mds_set_dqinfo(struct obd_device *obd, struct obd_quotactl *oqctl) +{ + struct mds_obd *mds = &obd->u.mds; + struct lustre_quota_info *qinfo = &mds->mds_quota_info; + struct obd_dqinfo *dqinfo = &oqctl->qc_dqinfo; + int rc = 0; + ENTRY; + + if (qinfo->qi_files[oqctl->qc_type] == NULL) + RETURN(-ESRCH); + + down(&qinfo->qi_sem); + qinfo->qi_info[oqctl->qc_type].dqi_bgrace = dqinfo->dqi_bgrace; + qinfo->qi_info[oqctl->qc_type].dqi_igrace = dqinfo->dqi_igrace; + qinfo->qi_info[oqctl->qc_type].dqi_flags = dqinfo->dqi_flags; + + rc = fsfilt_quotainfo(obd, qinfo, oqctl->qc_type, QFILE_WR_INFO); + up(&qinfo->qi_sem); + + RETURN(rc); +} + +int mds_get_dqinfo(struct obd_device *obd, struct obd_quotactl *oqctl) +{ + struct mds_obd *mds = &obd->u.mds; + struct lustre_quota_info *qinfo = &mds->mds_quota_info; + struct obd_dqinfo *dqinfo = &oqctl->qc_dqinfo; + ENTRY; + + if (qinfo->qi_files[oqctl->qc_type] == NULL) + RETURN(-ESRCH); + + down(&qinfo->qi_sem); + dqinfo->dqi_bgrace = qinfo->qi_info[oqctl->qc_type].dqi_bgrace; + dqinfo->dqi_igrace = qinfo->qi_info[oqctl->qc_type].dqi_igrace; + dqinfo->dqi_flags = qinfo->qi_info[oqctl->qc_type].dqi_flags; + up(&qinfo->qi_sem); + + RETURN(0); +} + +static int mds_init_slave_ilimits(struct obd_device *obd, + struct obd_quotactl *oqctl) +{ + /* XXX: for file limits only adjust local now */ + struct mds_obd *mds = &obd->u.mds; + unsigned int uid = 0, gid = 0; + struct obd_quotactl *ioqc; + int rc; + ENTRY; + + /* if we are going to set zero limit, needn't init slaves */ + if (!oqctl->qc_dqblk.dqb_ihardlimit) + RETURN(0); + + OBD_ALLOC(ioqc, sizeof(*ioqc)); + if (!ioqc) + RETURN(-ENOMEM); + + ioqc->qc_cmd = Q_SETQUOTA; + ioqc->qc_id = oqctl->qc_id; + ioqc->qc_type = oqctl->qc_type; + ioqc->qc_dqblk.dqb_valid = QIF_ILIMITS; + ioqc->qc_dqblk.dqb_ihardlimit = MIN_QLIMIT; + + /* set local limit to MIN_QLIMIT */ + rc = fsfilt_quotactl(obd, mds->mds_sb, ioqc); + if (rc) + GOTO(out, rc); + + /* trigger local qunit pre-acquire */ + if (oqctl->qc_type == USRQUOTA) + uid = oqctl->qc_id; + else + gid = oqctl->qc_id; + + rc = qctxt_adjust_qunit(obd, &mds->mds_quota_ctxt, uid, gid, 0); + if (rc) { + CERROR("error mds adjust local file quota! (rc:%d)\n", rc); + GOTO(out, rc); + } + /* FIXME initialize all slaves in CMD */ +out: + OBD_FREE(ioqc, sizeof(*ioqc)); + RETURN(rc); +} + +static int mds_init_slave_blimits(struct obd_device *obd, + struct obd_quotactl *oqctl) +{ + struct mds_obd *mds = &obd->u.mds; + struct obd_quotactl *ioqc; + unsigned int uid = 0, gid = 0; + int rc; + ENTRY; + + /* if we are going to set zero limit, needn't init slaves */ + if (!oqctl->qc_dqblk.dqb_bhardlimit) + RETURN(0); + + OBD_ALLOC(ioqc, sizeof(*ioqc)); + if (!ioqc) + RETURN(-ENOMEM); + + ioqc->qc_cmd = Q_SETQUOTA; + ioqc->qc_id = oqctl->qc_id; + ioqc->qc_type = oqctl->qc_type; + ioqc->qc_dqblk.dqb_valid = QIF_BLIMITS; + ioqc->qc_dqblk.dqb_bhardlimit = MIN_QLIMIT; + + /* set local limit to MIN_QLIMIT */ + rc = fsfilt_quotactl(obd, mds->mds_sb, ioqc); + if (rc) + GOTO(out, rc); + + /* trigger local qunit pre-acquire */ + if (oqctl->qc_type == USRQUOTA) + uid = oqctl->qc_id; + else + gid = oqctl->qc_id; + + rc = qctxt_adjust_qunit(obd, &mds->mds_quota_ctxt, uid, gid, 1); + if (rc) { + CERROR("error mds adjust local block quota! (rc:%d)\n", rc); + GOTO(out, rc); + } + + /* initialize all slave's limit */ + ioqc->qc_cmd = Q_INITQUOTA; + rc = obd_quotactl(mds->mds_osc_exp, ioqc); +out: + OBD_FREE(ioqc, sizeof(*ioqc)); + RETURN(rc); +} + +int mds_set_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl) +{ + struct mds_obd *mds = &obd->u.mds; + struct lustre_quota_info *qinfo = &mds->mds_quota_info; + __u32 ihardlimit, isoftlimit, bhardlimit, bsoftlimit; + time_t btime, itime; + struct lustre_dquot *dquot; + struct obd_dqblk *dqblk = &oqctl->qc_dqblk; + int rc = 0; + ENTRY; + + if (qinfo->qi_files[oqctl->qc_type] == NULL) + RETURN(-ESRCH); + + dquot = lustre_dqget(obd, qinfo, oqctl->qc_id, oqctl->qc_type); + if (IS_ERR(dquot)) + RETURN(PTR_ERR(dquot)); + DQUOT_DEBUG(dquot, "get dquot in mds_set_blk\n"); + QINFO_DEBUG(dquot->dq_info, "get dquot in mds_set_blk\n"); + + down(&dquot->dq_info->qi_sem); + down(&dquot->dq_sem); + + ihardlimit = dquot->dq_dqb.dqb_ihardlimit; + isoftlimit = dquot->dq_dqb.dqb_isoftlimit; + bhardlimit = dquot->dq_dqb.dqb_bhardlimit; + bsoftlimit = dquot->dq_dqb.dqb_bsoftlimit; + btime = dquot->dq_dqb.dqb_btime; + itime = dquot->dq_dqb.dqb_itime; + + if (dqblk->dqb_valid & QIF_BLIMITS) { + dquot->dq_dqb.dqb_bhardlimit = dqblk->dqb_bhardlimit; + dquot->dq_dqb.dqb_bsoftlimit = dqblk->dqb_bsoftlimit; + /* clear usage (limit pool) */ + if (dquot->dq_dqb.dqb_bhardlimit == 0) + dquot->dq_dqb.dqb_curspace = 0; + } + + if (dqblk->dqb_valid & QIF_ILIMITS) { + dquot->dq_dqb.dqb_ihardlimit = dqblk->dqb_ihardlimit; + dquot->dq_dqb.dqb_isoftlimit = dqblk->dqb_isoftlimit; + /* clear usage (limit pool) */ + if (dquot->dq_dqb.dqb_ihardlimit == 0) + dquot->dq_dqb.dqb_curinodes = 0; + } + + if (dqblk->dqb_valid & QIF_BTIME) + dquot->dq_dqb.dqb_btime = dqblk->dqb_btime; + + if (dqblk->dqb_valid & QIF_ITIME) + dquot->dq_dqb.dqb_itime = dqblk->dqb_itime; + + rc = fsfilt_dquot(obd, dquot, QFILE_WR_DQUOT); + + up(&dquot->dq_sem); + up(&dquot->dq_info->qi_sem); + + if (rc) + GOTO(out, rc); + + if (dqblk->dqb_valid & QIF_ILIMITS && !ihardlimit) { + rc = mds_init_slave_ilimits(obd, oqctl); + if (rc) { + CERROR("init slave ilimits failed! (rc:%d)\n", rc); + GOTO(revoke_out, rc); + } + } + + if (dqblk->dqb_valid & QIF_BLIMITS && !bhardlimit) { + rc = mds_init_slave_blimits(obd, oqctl); + if (rc) { + CERROR("init slave blimits failed! (rc:%d)\n", rc); + GOTO(revoke_out, rc); + } + } + +revoke_out: + if (rc) { + /* cancel previous setting */ + down(&dquot->dq_info->qi_sem); + down(&dquot->dq_sem); + dquot->dq_dqb.dqb_ihardlimit = ihardlimit; + dquot->dq_dqb.dqb_isoftlimit = isoftlimit; + dquot->dq_dqb.dqb_bhardlimit = bhardlimit; + dquot->dq_dqb.dqb_bsoftlimit = bsoftlimit; + dquot->dq_dqb.dqb_btime = btime; + dquot->dq_dqb.dqb_itime = itime; + fsfilt_dquot(obd, dquot, QFILE_WR_DQUOT); + up(&dquot->dq_sem); + up(&dquot->dq_info->qi_sem); + } +out: + lustre_dqput(dquot); + RETURN(rc); +} + +static int mds_get_space(struct obd_device *obd, struct obd_quotactl *oqctl) +{ + struct obd_quotactl *soqc; + int rc; + + OBD_ALLOC(soqc, sizeof(*soqc)); + if (!soqc) + RETURN(-ENOMEM); + + soqc->qc_cmd = oqctl->qc_cmd; + soqc->qc_id = oqctl->qc_id; + soqc->qc_type = oqctl->qc_type; + + rc = obd_quotactl(obd->u.mds.mds_osc_exp, soqc); + + oqctl->qc_dqblk.dqb_curspace = soqc->qc_dqblk.dqb_curspace; + + OBD_FREE(soqc, sizeof(*soqc)); + return rc; +} + +int mds_get_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl) +{ + struct mds_obd *mds = &obd->u.mds; + struct lustre_quota_info *qinfo = &mds->mds_quota_info; + struct lustre_dquot *dquot; + struct obd_dqblk *dqblk = &oqctl->qc_dqblk; + int rc; + ENTRY; + + if (qinfo->qi_files[oqctl->qc_type] == NULL) + RETURN(-ESRCH); + + dquot = lustre_dqget(obd, qinfo, oqctl->qc_id, oqctl->qc_type); + if (IS_ERR(dquot)) + RETURN(PTR_ERR(dquot)); + + down(&dquot->dq_sem); + dqblk->dqb_ihardlimit = dquot->dq_dqb.dqb_ihardlimit; + dqblk->dqb_isoftlimit = dquot->dq_dqb.dqb_isoftlimit; + dqblk->dqb_bhardlimit = dquot->dq_dqb.dqb_bhardlimit; + dqblk->dqb_bsoftlimit = dquot->dq_dqb.dqb_bsoftlimit; + dqblk->dqb_btime = dquot->dq_dqb.dqb_btime; + dqblk->dqb_itime = dquot->dq_dqb.dqb_itime; + up(&dquot->dq_sem); + + /* the usages in admin quota file is inaccurate */ + dqblk->dqb_curinodes = 0; + dqblk->dqb_curspace = 0; + rc = mds_get_space(obd, oqctl); + + lustre_dqput(dquot); + RETURN(rc); +} diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index a99c3f2..b3e0607 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -395,6 +395,9 @@ void *obd_psdev = NULL; EXPORT_SYMBOL(obd_dev); EXPORT_SYMBOL(obdo_cachep); +EXPORT_SYMBOL(qunit_cachep); +EXPORT_SYMBOL(qunit_hash_lock); +EXPORT_SYMBOL(qunit_hash); EXPORT_SYMBOL(obd_fail_loc); EXPORT_SYMBOL(ll_set_rdonly); EXPORT_SYMBOL(ll_clear_rdonly); @@ -418,6 +421,7 @@ EXPORT_SYMBOL(class_name2obd); EXPORT_SYMBOL(class_uuid2dev); EXPORT_SYMBOL(class_uuid2obd); EXPORT_SYMBOL(class_find_client_obd); +EXPORT_SYMBOL(class_find_client_notype); EXPORT_SYMBOL(class_devices_in_group); EXPORT_SYMBOL(__class_export_put); EXPORT_SYMBOL(class_new_export); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index d5d4334..db8500f 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -27,21 +27,29 @@ #include /* for request_module() */ #include #include +#include +#include #include #include #include +#include #else #include #include #include #endif #include +#include extern struct list_head obd_types; static spinlock_t obd_types_lock = SPIN_LOCK_UNLOCKED; kmem_cache_t *obdo_cachep = NULL; kmem_cache_t *import_cachep = NULL; +kmem_cache_t *qunit_cachep = NULL; +struct list_head qunit_hash[NR_DQHASH]; +spinlock_t qunit_hash_lock = SPIN_LOCK_UNLOCKED; + int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c); void (*ptlrpc_abort_inflight_superhack)(struct obd_import *imp); @@ -309,6 +317,18 @@ struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid, return NULL; } +struct obd_device *class_find_client_notype(struct obd_uuid *tgt_uuid, + struct obd_uuid *grp_uuid) +{ + struct obd_device *obd; + + obd = class_find_client_obd(tgt_uuid, LUSTRE_MDC_NAME, NULL); + if (!obd) + obd = class_find_client_obd(tgt_uuid, LUSTRE_OSC_NAME, + grp_uuid); + return obd; +} + /* Iterate the obd_device list looking devices have grp_uuid. Start searching at *next, and if a device is found, the next index to look at is saved in *next. If next is NULL, then the first matching device @@ -341,6 +361,23 @@ struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, int *next) return NULL; } +static void obd_cleanup_qunit_cache(void) +{ + int i; + ENTRY; + + spin_lock(&qunit_hash_lock); + for (i = 0; i < NR_DQHASH; i++) + LASSERT(list_empty(qunit_hash + i)); + spin_unlock(&qunit_hash_lock); + + if (qunit_cachep) { + LASSERTF(kmem_cache_destroy(qunit_cachep) == 0, + "Cannot destroy ll_qunit_cache\n"); + qunit_cachep = NULL; + } + EXIT; +} void obd_cleanup_caches(void) { @@ -355,12 +392,34 @@ void obd_cleanup_caches(void) "Cannot destory ll_import_cache\n"); import_cachep = NULL; } + obd_cleanup_qunit_cache(); EXIT; } +static int obd_init_qunit_cache(void) +{ + int i; + ENTRY; + + LASSERT(qunit_cachep == NULL); + qunit_cachep = kmem_cache_create("ll_qunit_cache", + sizeof(struct lustre_qunit), + 0, 0, NULL, NULL); + if (!qunit_cachep) + RETURN(-ENOMEM); + + spin_lock(&qunit_hash_lock); + for (i = 0; i < NR_DQHASH; i++) + INIT_LIST_HEAD(qunit_hash + i); + spin_unlock(&qunit_hash_lock); + RETURN(0); +} + int obd_init_caches(void) { + int rc = 0; ENTRY; + LASSERT(obdo_cachep == NULL); obdo_cachep = kmem_cache_create("ll_obdo_cache", sizeof(struct obdo), 0, 0, NULL, NULL); @@ -374,6 +433,10 @@ int obd_init_caches(void) if (!import_cachep) GOTO(out, -ENOMEM); + rc = obd_init_qunit_cache(); + if (rc) + GOTO(out, rc); + RETURN(0); out: obd_cleanup_caches(); diff --git a/lustre/obdclass/llog_ioctl.c b/lustre/obdclass/llog_ioctl.c index f364a21..3c05a97 100644 --- a/lustre/obdclass/llog_ioctl.c +++ b/lustre/obdclass/llog_ioctl.c @@ -110,6 +110,7 @@ static int llog_check_cb(struct llog_handle *handle, struct llog_rec_hdr *rec, case OST_SZ_REC: case OST_RAID1_REC: case MDS_UNLINK_REC: + case MDS_SETATTR_REC: case OBD_CFG_REC: case PTL_CFG_REC: case LLOG_HDR_MAGIC: { diff --git a/lustre/obdclass/llog_lvfs.c b/lustre/obdclass/llog_lvfs.c index 2cae0a7..6c45e8d 100644 --- a/lustre/obdclass/llog_lvfs.c +++ b/lustre/obdclass/llog_lvfs.c @@ -304,8 +304,9 @@ static int llog_lvfs_write_rec(struct llog_handle *loghandle, if (rc == 0 && reccookie) { reccookie->lgc_lgl = loghandle->lgh_id; reccookie->lgc_index = index; - if (rec->lrh_type == MDS_UNLINK_REC) - reccookie->lgc_subsys = LLOG_UNLINK_ORIG_CTXT; + if ((rec->lrh_type == MDS_UNLINK_REC) || + (rec->lrh_type == MDS_SETATTR_REC)) + reccookie->lgc_subsys = LLOG_MDS_OST_ORIG_CTXT; else if (rec->lrh_type == OST_SZ_REC) reccookie->lgc_subsys = LLOG_SIZE_ORIG_CTXT; else if (rec->lrh_type == OST_RAID1_REC) diff --git a/lustre/obdclass/llog_obd.c b/lustre/obdclass/llog_obd.c index c22316c..0796c50 100644 --- a/lustre/obdclass/llog_obd.c +++ b/lustre/obdclass/llog_obd.c @@ -92,7 +92,7 @@ EXPORT_SYMBOL(llog_sync); int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, struct llog_cookie *logcookies, - int numcookies) + int numcookies, llog_fill_rec_cb_t fill_cb) { int rc; ENTRY; @@ -104,7 +104,7 @@ int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, CTXT_CHECK_OP(ctxt, add, -EOPNOTSUPP); - rc = CTXTP(ctxt, add)(ctxt, rec, lsm, logcookies, numcookies); + rc = CTXTP(ctxt, add)(ctxt, rec, lsm, logcookies, numcookies, fill_cb); RETURN(rc); } EXPORT_SYMBOL(llog_add); @@ -269,7 +269,8 @@ EXPORT_SYMBOL(llog_obd_origin_cleanup); /* add for obdfilter/sz and mds/unlink */ int llog_obd_origin_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, - struct llog_cookie *logcookies, int numcookies) + struct llog_cookie *logcookies, int numcookies, + llog_fill_rec_cb_t fill_cb) { struct llog_handle *cathandle; int rc; diff --git a/lustre/obdclass/llog_swab.c b/lustre/obdclass/llog_swab.c index 3a2ae51..5b13389 100644 --- a/lustre/obdclass/llog_swab.c +++ b/lustre/obdclass/llog_swab.c @@ -107,6 +107,17 @@ void lustre_swab_llog_rec(struct llog_rec_hdr *rec, struct llog_rec_tail *tail) break; } + case MDS_SETATTR_REC: { + struct llog_setattr_rec *lsr = (struct llog_setattr_rec *)rec; + + __swab64s(&lsr->lsr_oid); + __swab32s(&lsr->lsr_ogen); + __swab32s(&lsr->lsr_uid); + __swab32s(&lsr->lsr_gid); + + break; + } + case OBD_CFG_REC: case PTL_CFG_REC: /* these are swabbed as they are consumed */ diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 398b462..1b663e9 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -607,7 +607,7 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats) LASSERT(obd->obd_proc_entry != NULL); LASSERT(obd->obd_cntr_base == 0); - num_stats = 1 + OBD_COUNTER_OFFSET(notify) + + num_stats = 1 + OBD_COUNTER_OFFSET(quotactl) + num_private_stats; stats = lprocfs_alloc_stats(num_stats); if (stats == NULL) @@ -634,6 +634,7 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats) LPROCFS_OBD_OP_INIT(num_private_stats, stats, create); LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy); LPROCFS_OBD_OP_INIT(num_private_stats, stats, setattr); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, setattr_async); LPROCFS_OBD_OP_INIT(num_private_stats, stats, getattr); LPROCFS_OBD_OP_INIT(num_private_stats, stats, getattr_async); LPROCFS_OBD_OP_INIT(num_private_stats, stats, brw); @@ -667,6 +668,8 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats) LPROCFS_OBD_OP_INIT(num_private_stats, stats, unpin); LPROCFS_OBD_OP_INIT(num_private_stats, stats, import_event); LPROCFS_OBD_OP_INIT(num_private_stats, stats, notify); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotacheck); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotactl); for (i = num_private_stats; i < num_stats; i++) { /* If this LBUGs, it is likely that an obd diff --git a/lustre/obdfilter/Makefile.in b/lustre/obdfilter/Makefile.in index 3e0871a..eef89e8 100644 --- a/lustre/obdfilter/Makefile.in +++ b/lustre/obdfilter/Makefile.in @@ -1,4 +1,5 @@ MODULES := obdfilter +MDS := @top_srcdir@/lustre/mds/ obdfilter-objs := filter.o filter_io.o filter_log.o filter_san.o obdfilter-objs += lproc_obdfilter.o filter_lvb.o @@ -6,6 +7,7 @@ ifeq ($(PATCHLEVEL),4) obdfilter-objs += filter_io_24.o else obdfilter-objs += filter_io_26.o +obdfilter-objs += $(MDS)quota_context.o endif # PATCHLEVEL @INCLUDE_RULES@ diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index a870d67..e93cc25 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -41,6 +41,8 @@ #include #include #include +#include +#include #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) # include # include @@ -54,6 +56,7 @@ #include #include #include +#include #include "filter_internal.h" @@ -1049,11 +1052,9 @@ static int filter_destroy_internal(struct obd_device *obd, obd_id objid, } rc = vfs_unlink(dparent->d_inode, dchild); - if (rc) CERROR("error unlinking objid %.*s: rc %d\n", dchild->d_name.len, dchild->d_name.name, rc); - RETURN(rc); } @@ -1261,6 +1262,8 @@ int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, spin_lock_init(&filter->fo_w_disk_iosize.oh_lock); filter->fo_readcache_max_filesize = FILTER_MAX_CACHE_SIZE; + atomic_set(&filter->fo_quotachecking, 1); + sprintf(ns_name, "filter-%s", obd->obd_uuid.uuid); obd->obd_namespace = ldlm_namespace_new(ns_name, LDLM_NAMESPACE_SERVER); if (obd->obd_namespace == NULL) @@ -1300,6 +1303,13 @@ int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, obd->obd_replayable ? "enabled" : "disabled"); } + rc = qctxt_init(&filter->fo_quota_ctxt, filter->fo_sb, NULL); + if (rc) { + CERROR("initialize quota context failed! (rc:%d)\n", rc); + qctxt_cleanup(&filter->fo_quota_ctxt, 0); + GOTO(err_post, rc); + } + RETURN(0); err_post: @@ -1368,6 +1378,8 @@ static int filter_cleanup(struct obd_device *obd) } } + qctxt_cleanup(&filter->fo_quota_ctxt, 0); + ldlm_namespace_free(obd->obd_namespace, obd->obd_force); if (filter->fo_sb == NULL) @@ -1380,6 +1392,8 @@ static int filter_cleanup(struct obd_device *obd) filter_post(obd); shrink_dcache_parent(filter->fo_sb->s_root); + + DQUOT_OFF(filter->fo_sb); if (atomic_read(&filter->fo_vfsmnt->mnt_count) > 1) CERROR("%s: mount point %p busy, mnt_count: %d\n", @@ -1612,7 +1626,7 @@ static int filter_disconnect(struct obd_export *exp) fsfilt_sync(obd, obd->u.filter.fo_sb); /* flush any remaining cancel messages out to the target */ - ctxt = llog_get_context(obd, LLOG_UNLINK_REPL_CTXT); + ctxt = llog_get_context(obd, LLOG_MDS_OST_REPL_CTXT); err = llog_sync(ctxt, exp); if (err) CERROR("error flushing logs to MDS: rc %d\n", err); @@ -1678,33 +1692,51 @@ static int filter_getattr(struct obd_export *exp, struct obdo *oa, static int filter_setattr(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *md, struct obd_trans_info *oti) { + struct obd_device *obd; struct obd_run_ctxt saved; struct filter_obd *filter; struct dentry *dentry; struct iattr iattr; + uid_t orig_uid = 0; + gid_t orig_gid = 0; struct ldlm_res_id res_id = { .name = { oa->o_id } }; struct ldlm_resource *res; void *handle; + struct llog_cookie *fcc = NULL; int rc, rc2; ENTRY; - LASSERT(oti != NULL); - dentry = filter_oa2dentry(exp->exp_obd, oa); if (IS_ERR(dentry)) RETURN(PTR_ERR(dentry)); - filter = &exp->exp_obd->u.filter; + obd = exp->exp_obd; + filter = &obd->u.filter; iattr_from_obdo(&iattr, oa, oa->o_valid); push_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); lock_kernel(); + if (oa->o_valid & OBD_MD_FLCOOKIE) { + OBD_ALLOC(fcc, sizeof(*fcc)); + if (fcc != NULL) + memcpy(fcc, obdo_logcookie(oa), sizeof(*fcc)); + } + if (iattr.ia_valid & ATTR_SIZE) down(&dentry->d_inode->i_sem); - handle = fsfilt_start(exp->exp_obd, dentry->d_inode, FSFILT_OP_SETATTR, - oti); + + if (iattr.ia_valid & (ATTR_UID | ATTR_GID)) { + orig_uid = dentry->d_inode->i_uid; + orig_gid = dentry->d_inode->i_gid; + handle = fsfilt_start_log(exp->exp_obd, dentry->d_inode, + FSFILT_OP_SETATTR, oti, 1); + } else { + handle = fsfilt_start(exp->exp_obd, dentry->d_inode, + FSFILT_OP_SETATTR, oti); + } + if (IS_ERR(handle)) GOTO(out_unlock, rc = PTR_ERR(handle)); @@ -1713,8 +1745,21 @@ static int filter_setattr(struct obd_export *exp, struct obdo *oa, rc = fsfilt_iocontrol(exp->exp_obd, dentry->d_inode, NULL, EXT3_IOC_SETFLAGS, (long)&iattr.ia_attr_flags); - else + else { rc = fsfilt_setattr(exp->exp_obd, dentry, handle, &iattr, 1); + /* set cancel cookie callback function */ + if (fcc != NULL) { + if (oti != NULL) + fsfilt_add_journal_cb(obd, 0, oti->oti_handle, + filter_cancel_cookies_cb, + fcc); + else + fsfilt_add_journal_cb(obd, 0, handle, + filter_cancel_cookies_cb, + fcc); + } + } + rc = filter_finish_transno(exp, oti, rc); rc2 = fsfilt_commit(exp->exp_obd, dentry->d_inode, handle, 0); if (rc2) { @@ -1737,7 +1782,9 @@ static int filter_setattr(struct obd_export *exp, struct obdo *oa, } oa->o_valid = OBD_MD_FLID; - obdo_from_inode(oa, dentry->d_inode, FILTER_VALID_FLAGS); + /* Quota release need uid/gid info */ + obdo_from_inode(oa, dentry->d_inode, + FILTER_VALID_FLAGS | OBD_MD_FLUID | OBD_MD_FLGID); out_unlock: if (iattr.ia_valid & ATTR_SIZE) @@ -1746,6 +1793,19 @@ out_unlock: pop_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); f_dput(dentry); + + /* trigger quota release */ + if (rc == 0 && iattr.ia_valid & (ATTR_SIZE | ATTR_UID | ATTR_GID)) { + rc2 = qctxt_adjust_qunit(obd, &filter->fo_quota_ctxt, + oa->o_uid, oa->o_gid, 1); + if (rc2) + CERROR("error filter adjust qunit! (rc:%d)\n", rc2); + /* after owner changed, release quota for the original owner */ + rc2 = qctxt_adjust_qunit(obd, &filter->fo_quota_ctxt, + orig_uid, orig_gid, 1); + if (rc2) + CERROR("error filter adjust qunit! (rc:%d)\n", rc2); + } RETURN(rc); } @@ -2026,7 +2086,7 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa, GOTO(cleanup, rc = PTR_ERR(handle)); cleanup_phase = 3; - rc = ll_vfs_create(dparent->d_inode, dchild, S_IFREG, NULL); + rc = ll_vfs_create(dparent->d_inode, dchild, S_IFREG | 0666, NULL); if (rc) { CERROR("create failed rc = %d\n", rc); GOTO(cleanup, rc); @@ -2216,6 +2276,8 @@ static int filter_destroy(struct obd_export *exp, struct obdo *oa, memcpy(fcc, obdo_logcookie(oa), sizeof(*fcc)); } + /* Quota release need uid/gid of inode */ + obdo_from_inode(oa, dchild->d_inode, OBD_MD_FLUID|OBD_MD_FLGID); rc = filter_destroy_internal(obd, oa->o_id, dparent, dchild); cleanup: @@ -2250,6 +2312,14 @@ cleanup: LBUG(); } + /* trigger quota release */ + if (rc == 0) { + rc2 = qctxt_adjust_qunit(obd, &filter->fo_quota_ctxt, + oa->o_uid, oa->o_gid, 1); + if (rc2) + CERROR("error filter adjust qunit! (rc:%d)\n", rc2); + } + RETURN(rc); } @@ -2289,7 +2359,7 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa, if (!oa || !(oa->o_valid & OBD_MD_FLID)) { rc = fsfilt_sync(exp->exp_obd, filter->fo_sb); /* flush any remaining cancel messages out to the target */ - ctxt = llog_get_context(exp->exp_obd, LLOG_UNLINK_REPL_CTXT); + ctxt = llog_get_context(exp->exp_obd, LLOG_MDS_OST_REPL_CTXT); llog_sync(ctxt, exp); RETURN(rc); } @@ -2388,8 +2458,14 @@ static int filter_set_info(struct obd_export *exp, __u32 keylen, CWARN("%s: received MDS connection ("LPX64")\n", obd->obd_name, conn.cookie); memcpy(&obd->u.filter.fo_mdc_conn, &conn, sizeof(conn)); - ctxt = llog_get_context(obd, LLOG_UNLINK_REPL_CTXT); + + /* setup llog imports */ + ctxt = llog_get_context(obd, LLOG_MDS_OST_REPL_CTXT); rc = llog_receptor_accept(ctxt, exp->exp_imp_reverse); + + /* setup the quota context import */ + obd->u.filter.fo_quota_ctxt.lqc_import = exp->exp_imp_reverse; + RETURN(rc); } @@ -2461,7 +2537,68 @@ int filter_iocontrol(unsigned int cmd, struct obd_export *exp, RETURN(0); } -static struct llog_operations filter_unlink_repl_logops; +static int filter_quotacheck(struct obd_export *exp, struct obd_quotactl *oqctl) +{ + struct obd_run_ctxt saved; + struct obd_device *obd = exp->exp_obd; + int rc; + + push_ctxt(&saved, &obd->obd_ctxt, NULL); + + rc = fsfilt_quotacheck(obd, obd->u.filter.fo_sb, oqctl); + if (rc) + CERROR("%s: fsfilt_quotacheck: %d\n", obd->obd_name, rc); + + pop_ctxt(&saved, &obd->obd_ctxt, NULL); + + RETURN(rc); +} + +static int filter_quotactl(struct obd_export *exp, struct obd_quotactl *oqctl) +{ + struct obd_device *obd = exp->exp_obd; + struct obd_run_ctxt saved; + int rc = 0; + ENTRY; + + if (oqctl->qc_cmd == Q_QUOTAON || oqctl->qc_cmd == Q_QUOTAOFF || + oqctl->qc_cmd == Q_GETOINFO || oqctl->qc_cmd == Q_GETOQUOTA || + oqctl->qc_cmd == Q_GETQUOTA) { + push_ctxt(&saved, &obd->obd_ctxt, NULL); + rc = fsfilt_quotactl(obd, obd->u.filter.fo_sb, oqctl); + pop_ctxt(&saved, &obd->obd_ctxt, NULL); + } else if (oqctl->qc_cmd == Q_INITQUOTA) { + unsigned int uid = 0, gid = 0; + + /* initialize quota limit to MIN_QLIMIT */ + LASSERT(oqctl->qc_dqblk.dqb_valid == QIF_BLIMITS); + LASSERT(oqctl->qc_dqblk.dqb_bhardlimit == MIN_QLIMIT); + LASSERT(oqctl->qc_dqblk.dqb_bsoftlimit == 0); + oqctl->qc_cmd = Q_SETQUOTA; + rc = fsfilt_quotactl(obd, obd->u.filter.fo_sb, oqctl); + /* this value will be replied to client, we must restore it */ + oqctl->qc_cmd = Q_INITQUOTA; + if (rc) + RETURN(rc); + + /* trigger qunit pre-acquire */ + if (oqctl->qc_type == USRQUOTA) + uid = oqctl->qc_id; + else + gid = oqctl->qc_id; + + rc = qctxt_adjust_qunit(obd, &obd->u.filter.fo_quota_ctxt, + uid, gid, 1); + } else { + CERROR("%s: unsupported filter_quotactl command: %d\n", + obd->obd_name, oqctl->qc_cmd); + LBUG(); + } + + RETURN(rc); +} + +static struct llog_operations filter_mds_ost_repl_logops; static struct llog_operations filter_size_orig_logops = { lop_setup: llog_obd_origin_setup, lop_cleanup: llog_obd_origin_cleanup, @@ -2475,18 +2612,19 @@ static int filter_llog_init(struct obd_device *obd, struct obd_device *tgt, int rc; ENTRY; - filter_unlink_repl_logops = llog_client_ops; - filter_unlink_repl_logops.lop_cancel = llog_obd_repl_cancel; - filter_unlink_repl_logops.lop_connect = llog_repl_connect; - filter_unlink_repl_logops.lop_sync = llog_obd_repl_sync; + filter_mds_ost_repl_logops = llog_client_ops; + filter_mds_ost_repl_logops.lop_cancel = llog_obd_repl_cancel; + filter_mds_ost_repl_logops.lop_connect = llog_repl_connect; + filter_mds_ost_repl_logops.lop_sync = llog_obd_repl_sync; - rc = llog_setup(obd, LLOG_UNLINK_REPL_CTXT, tgt, 0, NULL, - &filter_unlink_repl_logops); + rc = llog_setup(obd, LLOG_MDS_OST_REPL_CTXT, tgt, 0, NULL, + &filter_mds_ost_repl_logops); if (rc) RETURN(rc); + /* FIXME - assign unlink_cb for filter's recovery */ - ctxt = llog_get_context(obd, LLOG_UNLINK_REPL_CTXT); - ctxt->llog_proc_cb = filter_recov_log_unlink_cb; + ctxt = llog_get_context(obd, LLOG_MDS_OST_REPL_CTXT); + ctxt->llog_proc_cb = filter_recov_log_mds_ost_cb; rc = llog_setup(obd, LLOG_SIZE_ORIG_CTXT, tgt, 0, NULL, &filter_size_orig_logops); @@ -2499,7 +2637,7 @@ static int filter_llog_finish(struct obd_device *obd, int count) int rc = 0, rc2 = 0; ENTRY; - ctxt = llog_get_context(obd, LLOG_UNLINK_REPL_CTXT); + ctxt = llog_get_context(obd, LLOG_MDS_OST_REPL_CTXT); if (ctxt) rc = llog_cleanup(ctxt); @@ -2546,6 +2684,8 @@ static struct obd_ops filter_obd_ops = { .o_llog_init = filter_llog_init, .o_llog_finish = filter_llog_finish, .o_iocontrol = filter_iocontrol, + .o_quotacheck = filter_quotacheck, + .o_quotactl = filter_quotactl, }; static struct obd_ops filter_sanobd_ops = { diff --git a/lustre/obdfilter/filter_internal.h b/lustre/obdfilter/filter_internal.h index da3d9ba..e90fb60 100644 --- a/lustre/obdfilter/filter_internal.h +++ b/lustre/obdfilter/filter_internal.h @@ -160,7 +160,7 @@ int filter_log_sz_change(struct llog_handle *cathandle, //int filter_get_catalog(struct obd_device *); void filter_cancel_cookies_cb(struct obd_device *obd, __u64 transno, void *cb_data, int error); -int filter_recov_log_unlink_cb(struct llog_handle *llh, +int filter_recov_log_mds_ost_cb(struct llog_handle *llh, struct llog_rec_hdr *rec, void *data); /* filter_san.c */ diff --git a/lustre/obdfilter/filter_io.c b/lustre/obdfilter/filter_io.c index a1eb318..d60f811 100644 --- a/lustre/obdfilter/filter_io.c +++ b/lustre/obdfilter/filter_io.c @@ -792,7 +792,8 @@ void filter_grant_commit(struct obd_export *exp, int niocount, int filter_commitrw(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, - struct niobuf_local *res, struct obd_trans_info *oti,int rc) + struct niobuf_local *res, struct obd_trans_info *oti, + int rc) { if (cmd == OBD_BRW_WRITE) return filter_commitrw_write(exp, oa, objcount, obj, niocount, diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c index 722e23d..e477545 100644 --- a/lustre/obdfilter/filter_io_26.c +++ b/lustre/obdfilter/filter_io_26.c @@ -34,6 +34,7 @@ #include #include +#include #include "filter_internal.h" #warning "implement writeback mode -bzzz" @@ -47,6 +48,7 @@ struct dio_request { int dr_max_pages; int dr_npages; int dr_error; + unsigned long dr_flag; /* indicating if there is client cache page in this rpc */ struct page **dr_pages; unsigned long *dr_blocks; spinlock_t dr_lock; @@ -365,6 +367,71 @@ static void filter_clear_page_cache(struct inode *inode, struct bio *iobuf) #endif } +static int filter_quota_enforcement(struct obd_device *obd, + unsigned int fsuid, unsigned int fsgid, + struct obd_ucred **ret_uc) +{ + struct filter_obd *filter = &obd->u.filter; + struct obd_ucred *uc = NULL; + ENTRY; + + if (!sb_any_quota_enabled(filter->fo_sb)) + RETURN(0); + + OBD_ALLOC(uc, sizeof(*uc)); + if (!uc) + RETURN(-ENOMEM); + *ret_uc = uc; + + uc->ouc_fsuid = fsuid; + uc->ouc_fsgid = fsgid; + uc->ouc_cap = current->cap_effective; + if (!fsuid) + cap_raise(uc->ouc_cap, CAP_SYS_RESOURCE); + else + cap_lower(uc->ouc_cap, CAP_SYS_RESOURCE); + + RETURN(0); +} + +static int filter_get_quota_flag(struct obd_device *obd, + struct obdo *oa) +{ + struct filter_obd *filter = &obd->u.filter; + int cnt; + int rc = 0, err; + ENTRY; + + if (!sb_any_quota_enabled(filter->fo_sb)) + RETURN(rc); + + oa->o_flags = QUOTA_OK; + + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + struct obd_quotactl oqctl; + + oqctl.qc_cmd = Q_GETQUOTA; + oqctl.qc_type = cnt; + oqctl.qc_id = (cnt == USRQUOTA) ? oa->o_uid : oa->o_gid; + err = fsfilt_quotactl(obd, filter->fo_sb, &oqctl); + if (err) { + if (!rc) + rc = err; + continue; + } + + /* set over quota flags for a uid/gid */ + oa->o_valid |= (cnt == USRQUOTA) ? + OBD_MD_FLUSRQUOTA : OBD_MD_FLGRPQUOTA; + if (oqctl.qc_dqblk.dqb_bhardlimit && + (toqb(oqctl.qc_dqblk.dqb_curspace) > oqctl.qc_dqblk.dqb_bhardlimit)) + oa->o_flags |= (cnt == USRQUOTA) ? + OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA; + } + + RETURN(rc); +} + /* Must be called with i_sem taken for writes; this will drop it */ int filter_direct_io(int rw, struct dentry *dchild, void *iobuf, struct obd_export *exp, struct iattr *attr, @@ -374,6 +441,7 @@ int filter_direct_io(int rw, struct dentry *dchild, void *iobuf, struct dio_request *dreq = iobuf; struct inode *inode = dchild->d_inode; int blocks_per_page = PAGE_SIZE >> inode->i_blkbits; + struct lustre_quota_ctxt *qctxt = &obd->u.filter.fo_quota_ctxt; int rc, rc2; ENTRY; @@ -386,12 +454,33 @@ int filter_direct_io(int rw, struct dentry *dchild, void *iobuf, if (dreq->dr_npages == 0) RETURN(0); + /* If there is any page in this write rpc that comes from client + * cache, we write the whole rpc without quota limit */ + if (dreq->dr_flag & OBD_BRW_FROM_GRANT) { + cap_raise(current->cap_effective, CAP_SYS_RESOURCE); + dreq->dr_flag &= ~OBD_BRW_FROM_GRANT; + } + +remap: rc = fsfilt_map_inode_pages(obd, inode, dreq->dr_pages, dreq->dr_npages, dreq->dr_blocks, obdfilter_created_scratchpad, rw == OBD_BRW_WRITE, NULL); + if (rc == -EDQUOT) { + LASSERT(rw == OBD_BRW_WRITE && + !cap_raised(current->cap_effective, CAP_SYS_RESOURCE)); + + /* Unfortunately, if quota master is too busy to handle the + * pre-dqacq in time or this user has exceeded quota limit, we + * have to wait for the completion of in flight dqacq/dqrel, + * then try again */ + if (qctxt_wait_on_dqacq(obd, qctxt, inode->i_uid, + inode->i_gid, 1) == -EAGAIN) + goto remap; + } + if (rw == OBD_BRW_WRITE) { if (rc == 0) { filter_tally_write(&obd->u.filter, @@ -415,7 +504,6 @@ int filter_direct_io(int rw, struct dentry *dchild, void *iobuf, rc = rc2; if (rc != 0) RETURN(rc); - } /* This is nearly osync_inode, without the waiting @@ -474,6 +562,8 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, unsigned long now = jiffies; int i, err, cleanup_phase = 0; struct obd_device *obd = exp->exp_obd; + struct filter_obd *filter = &obd->u.filter; + struct obd_ucred *uc = NULL; int total_size = 0; ENTRY; @@ -517,9 +607,20 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, this_size = lnb->offset + lnb->len; if (this_size > iattr.ia_size) iattr.ia_size = this_size; + /* if one page is a write-back page from client cache, + * then mark that the whole io request can be over quota */ + if (lnb->flags & OBD_BRW_FROM_GRANT) + dreq->dr_flag |= OBD_BRW_FROM_GRANT; } - push_ctxt(&saved, &obd->obd_ctxt, NULL); + /* The client store the user credit information fsuid and fsgid + * in oa->o_uid and oa->o_gid. In case of quota enabled, we use + * them to build the obd_ucred so as to enforce oss quota check */ + rc = filter_quota_enforcement(obd, oa->o_uid, oa->o_gid, &uc); + if (rc) + GOTO(cleanup, rc); + + push_ctxt(&saved, &obd->obd_ctxt, uc); cleanup_phase = 2; down(&inode->i_sem); @@ -542,7 +643,12 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, rc = filter_direct_io(OBD_BRW_WRITE, res->dentry, dreq, exp, &iattr, oti, NULL); if (rc == 0) - obdo_from_inode(oa, inode, FILTER_VALID_FLAGS); + obdo_from_inode(oa, inode, + FILTER_VALID_FLAGS | OBD_MD_FLUID | OBD_MD_FLGID); + else + obdo_from_inode(oa, inode, OBD_MD_FLUID | OBD_MD_FLGID); + + filter_get_quota_flag(obd, oa); fsfilt_check_slow(now, obd_timeout, "direct_io"); @@ -560,7 +666,9 @@ cleanup: switch (cleanup_phase) { case 2: - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_ctxt, uc); + if (uc) + OBD_FREE(uc, sizeof(*uc)); LASSERT(current->journal_info == NULL); case 1: filter_free_iobuf(dreq); @@ -569,5 +677,12 @@ cleanup: f_dput(res->dentry); } + /* trigger quota pre-acquire */ + if (rc == 0) { + err = qctxt_adjust_qunit(obd, &filter->fo_quota_ctxt, + oa->o_uid, oa->o_gid, 1); + if (err) + CERROR("error filter ajust qunit! (rc:%d)\n", err); + } RETURN(rc); } diff --git a/lustre/obdfilter/filter_log.c b/lustre/obdfilter/filter_log.c index d8fe7c8..34ad008 100644 --- a/lustre/obdfilter/filter_log.c +++ b/lustre/obdfilter/filter_log.c @@ -115,36 +115,116 @@ void filter_cancel_cookies_cb(struct obd_device *obd, __u64 transno, /* Callback for processing the unlink log record received from MDS by * llog_client_api. */ -int filter_recov_log_unlink_cb(struct llog_handle *llh, - struct llog_rec_hdr *rec, void *data) +static int filter_recov_log_unlink_cb(struct llog_ctxt *ctxt, + struct llog_rec_hdr *rec, + struct llog_cookie *cookie) { - struct llog_ctxt *ctxt = llh->lgh_ctxt; struct obd_device *obd = ctxt->loc_obd; struct obd_export *exp = obd->obd_self_export; - struct llog_cookie cookie; - struct llog_gen_rec *lgr; struct llog_unlink_rec *lur; struct obdo *oa; obd_id oid; int rc = 0; ENTRY; + lur = (struct llog_unlink_rec *)rec; + oa = obdo_alloc(); + if (oa == NULL) + RETURN(-ENOMEM); + oa->o_valid |= OBD_MD_FLCOOKIE; + oa->o_id = lur->lur_oid; + oa->o_gr = lur->lur_ogen; + memcpy(obdo_logcookie(oa), cookie, sizeof(*cookie)); + oid = oa->o_id; + + rc = obd_destroy(exp, oa, NULL, NULL); + obdo_free(oa); + if (rc == -ENOENT) { + CDEBUG(D_HA, "object already removed, send cookie\n"); + llog_cancel(ctxt, NULL, 1, cookie, 0); + RETURN(0); + } + + if (rc == 0) + CDEBUG(D_HA, "object: "LPU64" in record is destroyed\n", oid); + + RETURN(rc); +} + +/* Callback for processing the setattr log record received from MDS by + * llog_client_api. + */ +static int filter_recov_log_setattr_cb(struct llog_ctxt *ctxt, + struct llog_rec_hdr *rec, + struct llog_cookie *cookie) +{ + struct obd_device *obd = ctxt->loc_obd; + struct obd_export *exp = obd->obd_self_export; + struct llog_setattr_rec *lsr; + struct obdo *oa; + obd_id oid; + int rc = 0; + ENTRY; + + lsr = (struct llog_setattr_rec *)rec; + oa = obdo_alloc(); + + oa->o_valid |= (OBD_MD_FLID | OBD_MD_FLUID | OBD_MD_FLGID | + OBD_MD_FLCOOKIE); + oa->o_id = lsr->lsr_oid; + oa->o_gr = lsr->lsr_ogen; + oa->o_uid = lsr->lsr_uid; + oa->o_gid = lsr->lsr_gid; + memcpy(obdo_logcookie(oa), cookie, sizeof(*cookie)); + oid = oa->o_id; + + rc = obd_setattr(exp, oa, NULL, NULL); + obdo_free(oa); + + if (rc == -ENOENT) { + CDEBUG(D_HA, "object already removed, send cookie\n"); + llog_cancel(ctxt, NULL, 1, cookie, 0); + RETURN(0); + } + + if (rc == 0) + CDEBUG(D_HA, "object: "LPU64" in record is chown/chgrp\n", oid); + + RETURN(rc); +} + +int filter_recov_log_mds_ost_cb(struct llog_handle *llh, + struct llog_rec_hdr *rec, void *data) +{ + struct llog_ctxt *ctxt = llh->lgh_ctxt; + struct llog_cookie cookie; + int rc = 0; + ENTRY; + if (!(llh->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN)) { CERROR("log is not plain\n"); RETURN(-EINVAL); } if (rec->lrh_type != MDS_UNLINK_REC && + rec->lrh_type != MDS_SETATTR_REC && rec->lrh_type != LLOG_GEN_REC) { CERROR("log record type error\n"); RETURN(-EINVAL); } - + cookie.lgc_lgl = llh->lgh_id; - cookie.lgc_subsys = LLOG_UNLINK_ORIG_CTXT; + cookie.lgc_subsys = LLOG_MDS_OST_ORIG_CTXT; cookie.lgc_index = rec->lrh_index; - if (rec->lrh_type == LLOG_GEN_REC) { - lgr = (struct llog_gen_rec *)rec; + switch (rec->lrh_type) { + case MDS_UNLINK_REC: + rc = filter_recov_log_unlink_cb(ctxt, rec, &cookie); + break; + case MDS_SETATTR_REC: + rc = filter_recov_log_setattr_cb(ctxt, rec, &cookie); + break; + case LLOG_GEN_REC: { + struct llog_gen_rec *lgr = (struct llog_gen_rec *)rec; if (llog_gen_lt(lgr->lgr_gen, ctxt->loc_gen)) rc = 0; else @@ -152,28 +232,10 @@ int filter_recov_log_unlink_cb(struct llog_handle *llh, CWARN("fetch generation log, send cookie\n"); llog_cancel(ctxt, NULL, 1, &cookie, 0); RETURN(rc); + } + default: + break; } - lur = (struct llog_unlink_rec *)rec; - oa = obdo_alloc(); - if (oa == NULL) - RETURN(-ENOMEM); - oa->o_valid |= OBD_MD_FLCOOKIE; - oa->o_id = lur->lur_oid; - oa->o_gr = lur->lur_ogen; - memcpy(obdo_logcookie(oa), &cookie, sizeof(cookie)); - oid = oa->o_id; - - rc = obd_destroy(exp, oa, NULL, NULL); - obdo_free(oa); - if (rc == -ENOENT) { - CDEBUG(D_HA, "object already removed, send cookie\n"); - llog_cancel(ctxt, NULL, 1, &cookie, 0); - RETURN(0); - } - - if (rc == 0) - CDEBUG(D_HA, "object: "LPU64" in record is destroyed\n", oid); - RETURN(rc); } diff --git a/lustre/obdfilter/lproc_obdfilter.c b/lustre/obdfilter/lproc_obdfilter.c index 2fc8420..61d0fb8 100644 --- a/lustre/obdfilter/lproc_obdfilter.c +++ b/lustre/obdfilter/lproc_obdfilter.c @@ -121,6 +121,121 @@ int lprocfs_filter_wr_readcache(struct file *file, const char *buffer, return count; } +static int lprocfs_filter_rd_bunit(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%lu\n", + obd->u.filter.fo_quota_ctxt.lqc_bunit_sz); +} + +static int lprocfs_filter_rd_iunit(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%lu\n", + obd->u.filter.fo_quota_ctxt.lqc_iunit_sz); +} + +static int lprocfs_filter_wr_bunit(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc = 0; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val % QUOTABLOCK_SIZE || + val <= obd->u.filter.fo_quota_ctxt.lqc_btune_sz) + return -EINVAL; + + obd->u.filter.fo_quota_ctxt.lqc_bunit_sz = val; + return count; +} + +static int lprocfs_filter_wr_iunit(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc = 0; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val <= obd->u.filter.fo_quota_ctxt.lqc_itune_sz) + return -EINVAL; + + obd->u.filter.fo_quota_ctxt.lqc_iunit_sz = val; + return count; +} + +static int lprocfs_filter_rd_btune(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%lu\n", + obd->u.filter.fo_quota_ctxt.lqc_btune_sz); +} + +static int lprocfs_filter_rd_itune(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%lu\n", + obd->u.filter.fo_quota_ctxt.lqc_itune_sz); +} + +static int lprocfs_filter_wr_btune(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc = 0; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val <= QUOTABLOCK_SIZE * MIN_QLIMIT || val % QUOTABLOCK_SIZE || + val >= obd->u.filter.fo_quota_ctxt.lqc_bunit_sz) + return -EINVAL; + + obd->u.filter.fo_quota_ctxt.lqc_btune_sz = val; + return count; +} + +static int lprocfs_filter_wr_itune(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc = 0; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val <= MIN_QLIMIT || + val >= obd->u.filter.fo_quota_ctxt.lqc_iunit_sz) + return -EINVAL; + + obd->u.filter.fo_quota_ctxt.lqc_itune_sz = val; + return count; +} + static struct lprocfs_vars lprocfs_obd_vars[] = { { "uuid", lprocfs_rd_uuid, 0, 0 }, { "blocksize", lprocfs_rd_blksize, 0, 0 }, @@ -142,6 +257,15 @@ static struct lprocfs_vars lprocfs_obd_vars[] = { { "readcache_max_filesize", lprocfs_filter_rd_readcache, lprocfs_filter_wr_readcache, 0 }, + { "quota_bunit_sz", lprocfs_filter_rd_bunit, + lprocfs_filter_wr_bunit, 0}, + { "quota_btune_sz", lprocfs_filter_rd_btune, + lprocfs_filter_wr_btune, 0}, + { "quota_iunit_sz", lprocfs_filter_rd_iunit, + lprocfs_filter_wr_iunit, 0}, + { "quota_itune_sz", lprocfs_filter_rd_itune, + lprocfs_filter_wr_itune, 0}, + { 0 } }; diff --git a/lustre/osc/Makefile.in b/lustre/osc/Makefile.in index 568a725..52fece6 100644 --- a/lustre/osc/Makefile.in +++ b/lustre/osc/Makefile.in @@ -1,4 +1,4 @@ MODULES := osc -osc-objs := osc_request.o lproc_osc.o osc_lib.o osc_create.o +osc-objs := osc_request.o lproc_osc.o osc_lib.o osc_create.o osc_quota.o @INCLUDE_RULES@ diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h index 20851a0..8fe19d9 100644 --- a/lustre/osc/osc_internal.h +++ b/lustre/osc/osc_internal.h @@ -52,6 +52,14 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md **ea, struct obd_trans_info *oti); void oscc_init(struct obd_device *obd); void osc_wake_cache_waiters(struct client_obd *cli); +int osc_get_quota_flag(struct client_obd *cli, unsigned int uid, + unsigned int gid); +int osc_set_quota_flag(struct client_obd *cli, + unsigned int uid, unsigned int gid, + obd_flag valid, obd_flag flags); +int osc_qinfo_cleanup(struct client_obd *cli); +int osc_qinfo_init(void); +void osc_qinfo_exit(void); #ifdef __KERNEL__ int lproc_osc_attach_seqstat(struct obd_device *dev); diff --git a/lustre/osc/osc_quota.c b/lustre/osc/osc_quota.c new file mode 100644 index 0000000..e84027b --- /dev/null +++ b/lustre/osc/osc_quota.c @@ -0,0 +1,244 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (c) 2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_OSC + +#ifdef __KERNEL__ +# include +# include +# include +# include +# include +# include +# include +# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) +# include +# include +# else +# include +# endif +#endif + +#include "osc_internal.h" + +struct osc_quota_info { + struct list_head oqi_hash; /* hash list */ + struct client_obd *oqi_cli; /* osc obd */ + unsigned int oqi_id; /* uid/gid of a file */ + short oqi_type; /* quota type */ + unsigned long oqi_flag; /* flag, NO_QUOTA */ +}; + +spinlock_t qinfo_list_lock = SPIN_LOCK_UNLOCKED; + +static struct list_head qinfo_hash[NR_DQHASH]; +/* SLAB cache for client quota context */ +kmem_cache_t *qinfo_cachep = NULL; + +static inline int const hashfn(struct client_obd *cli, + unsigned long id, + int type) +{ + unsigned long tmp = ((unsigned long)cli>>6) ^ id; + tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH; + return tmp; +} + +static inline void insert_qinfo_hash(struct osc_quota_info *oqi) +{ + struct list_head *head = qinfo_hash + + hashfn(oqi->oqi_cli, oqi->oqi_id, oqi->oqi_type); + list_add(&oqi->oqi_hash, head); +} + +static inline void remove_qinfo_hash(struct osc_quota_info *oqi) +{ + list_del_init(&oqi->oqi_hash); +} + +static inline struct osc_quota_info *find_qinfo(struct client_obd *cli, + unsigned int id, int type) +{ + unsigned int hashent = hashfn(cli, id, type); + struct list_head *head; + struct osc_quota_info *oqi; + + for (head = qinfo_hash[hashent].next; + head != qinfo_hash+hashent; head = head->next) { + oqi = list_entry(head, struct osc_quota_info, oqi_hash); + LASSERT(oqi->oqi_flag = NO_QUOTA); + if (oqi->oqi_cli == cli && + oqi->oqi_id == id && oqi->oqi_type == type) + return oqi; + } + return NULL; +} + +static struct osc_quota_info *alloc_qinfo(struct client_obd *cli, + unsigned int id, int type) +{ + struct osc_quota_info *oqi; + ENTRY; + + OBD_SLAB_ALLOC(oqi, qinfo_cachep, SLAB_KERNEL, + sizeof(*oqi)); + if(!oqi) + RETURN(NULL); + + INIT_LIST_HEAD(&oqi->oqi_hash); + oqi->oqi_cli = cli; + oqi->oqi_id = id; + oqi->oqi_type = type; + + RETURN(oqi); +} + +static void free_qinfo(struct osc_quota_info *oqi) +{ + OBD_SLAB_FREE(oqi, qinfo_cachep, sizeof(*oqi)); +} + +int osc_get_quota_flag(struct client_obd *cli, + unsigned int uid, unsigned int gid) +{ + unsigned int id; + int cnt, rc = QUOTA_OK; + ENTRY; + + spin_lock(&qinfo_list_lock); + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + struct osc_quota_info *oqi = NULL; + + id = (cnt == USRQUOTA) ? uid : gid; + oqi = find_qinfo(cli, id, cnt); + if (oqi) { + rc = NO_QUOTA; + break; + } + } + spin_unlock(&qinfo_list_lock); + + RETURN(rc); +} + +int osc_set_quota_flag(struct client_obd *cli, + unsigned int uid, unsigned int gid, + obd_flag valid, obd_flag flags) +{ + unsigned int id; + obd_flag noquota; + int cnt, rc = 0; + ENTRY; + + spin_lock(&qinfo_list_lock); + + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + struct osc_quota_info *oqi = NULL; + + if (!(valid & ((cnt == USRQUOTA) ? + OBD_MD_FLUSRQUOTA : OBD_MD_FLGRPQUOTA))) + continue; + + id = (cnt == USRQUOTA) ? uid : gid; + noquota = (cnt == USRQUOTA) ? + (flags & OBD_FL_NO_USRQUOTA) : (flags & OBD_FL_NO_GRPQUOTA); + + oqi = find_qinfo(cli, id, cnt); + + if (oqi && !noquota) { + remove_qinfo_hash(oqi); + free_qinfo(oqi); + } else if (!oqi && noquota) { + oqi = alloc_qinfo(cli, id, cnt); + if (!oqi) { + CERROR("not enough mem!\n"); + rc = -ENOMEM; + break; + } + oqi->oqi_flag = NO_QUOTA; + insert_qinfo_hash(oqi); + } + } + + spin_unlock(&qinfo_list_lock); + + RETURN(rc); +} + +int osc_qinfo_cleanup(struct client_obd *cli) +{ + struct osc_quota_info *oqi, *n; + int i; + ENTRY; + + spin_lock(&qinfo_list_lock); + for (i = 0; i < NR_DQHASH; i++) { + list_for_each_entry_safe(oqi, n, &qinfo_hash[i], oqi_hash) { + if (oqi->oqi_cli != cli) + continue; + remove_qinfo_hash(oqi); + free_qinfo(oqi); + } + } + spin_unlock(&qinfo_list_lock); + + RETURN(0); +} + +int osc_qinfo_init(void) +{ + int i; + ENTRY; + + LASSERT(qinfo_cachep == NULL); + qinfo_cachep = kmem_cache_create("osc_quota_info", + sizeof(struct osc_quota_info), + 0, 0, NULL, NULL); + if (!qinfo_cachep) + RETURN(-ENOMEM); + + for (i = 0; i < NR_DQHASH; i++) + INIT_LIST_HEAD(qinfo_hash + i); + + RETURN(0); +} + +void osc_qinfo_exit(void) +{ + struct osc_quota_info *oqi, *n; + int i; + ENTRY; + + spin_lock(&qinfo_list_lock); + for (i = 0; i < NR_DQHASH; i++) { + list_for_each_entry_safe(oqi, n, &qinfo_hash[i], oqi_hash) { + remove_qinfo_hash(oqi); + free_qinfo(oqi); + } + } + spin_unlock(&qinfo_list_lock); + + LASSERTF(kmem_cache_destroy(qinfo_cachep) == 0, + "couldn't destroy osc quota info slab\n"); +} diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index c6f2a10..95cd361 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -278,6 +278,35 @@ out: RETURN(0); } +static int osc_setattr_async(struct obd_export *exp, struct obdo *oa, + struct lov_stripe_md *md, struct obd_trans_info *oti) +{ + struct ptlrpc_request *request; + struct ost_body *body; + int rc = 0, size = sizeof(*body); + ENTRY; + + LASSERT(oti); + + request = ptlrpc_prep_req(class_exp2cliimp(exp), OST_SETATTR, 1, + &size, NULL); + if (!request) + RETURN(-ENOMEM); + + body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof(*body)); + + if (oa->o_valid & OBD_MD_FLCOOKIE) + memcpy(obdo_logcookie(oa), oti->oti_logcookies, + sizeof(*oti->oti_logcookies)); + + memcpy(&body->oa, oa, sizeof(*oa)); + request->rq_replen = lustre_msg_size(1, &size); + /* do mds to ost setattr asynchronouly */ + ptlrpcd_add_req(request); + + RETURN(rc); +} + int osc_real_create(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md **ea, struct obd_trans_info *oti) { @@ -848,6 +877,12 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa, RETURN(-EPROTO); } + /* set/clear over quota flag for a uid/gid */ + if (req->rq_reqmsg->opc == OST_WRITE && + body->oa.o_valid & (OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA)) + osc_set_quota_flag(cli, body->oa.o_uid, body->oa.o_gid, + body->oa.o_valid, body->oa.o_flags); + osc_update_grant(cli, body); memcpy(oa, &body->oa, sizeof(*oa)); @@ -1343,6 +1378,18 @@ static struct ptlrpc_request *osc_build_req(struct client_obd *cli, GOTO(out, req = ERR_PTR(rc)); } + /* To enforce quota on oss, we need pass the client's user credit + * information to ost. We chose to store the fsuid and fsgid in + * oa->o_uid and oa->o_gid since the two fields haven't been used + * at present. And we chose one page's user credit information as + * the whole rpc's credit information. FIXME */ + if (cmd == OBD_BRW_WRITE) { + struct obd_ucred ouc; + ops->ap_get_ucred(caller_data, &ouc); + oa->o_uid = ouc.ouc_fsuid; + oa->o_gid = ouc.ouc_fsgid; + } + LASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); aa = (struct osc_brw_async_args *)&req->rq_async_args; aa->aa_oa = oa; @@ -1891,7 +1938,7 @@ static int osc_queue_async_io(struct obd_export *exp, struct lov_stripe_md *lsm, struct client_obd *cli = &exp->exp_obd->u.cli; struct osc_async_page *oap; struct loi_oap_pages *lop; - int rc; + int rc = 0; ENTRY; oap = oap_from_cookie(cookie); @@ -1906,6 +1953,25 @@ static int osc_queue_async_io(struct obd_export *exp, struct lov_stripe_md *lsm, !list_empty(&oap->oap_rpc_item)) RETURN(-EBUSY); + /* check if the file's owner/group is over quota */ + if (cmd == OBD_BRW_WRITE){ + struct obd_async_page_ops *ops; + struct obdo *oa = NULL; + + oa = obdo_alloc(); + if (oa == NULL) + RETURN(-ENOMEM); + + ops = oap->oap_caller_ops; + ops->ap_fill_obdo(oap->oap_caller_data, cmd, oa); + if (osc_get_quota_flag(cli, oa->o_uid, oa->o_gid) == NO_QUOTA) + rc = -EDQUOT; + + obdo_free(oa); + if (rc) + RETURN(rc); + } + if (loi == NULL) loi = &lsm->lsm_oinfo[0]; @@ -2714,6 +2780,98 @@ static int osc_getstripe(struct lov_stripe_md *lsm, struct lov_user_md *lump) RETURN(rc); } +static int osc_quotacheck(struct obd_export *exp, struct obd_quotactl *oqctl) +{ + struct client_obd *cli = &exp->exp_obd->u.cli; + struct ptlrpc_request *req; + struct obd_quotactl *body; + int size = sizeof(*body); + int rc; + ENTRY; + + req = ptlrpc_prep_req(class_exp2cliimp(exp), OST_QUOTACHECK, 1, &size, + NULL); + if (!req) + GOTO(out, rc = -ENOMEM); + + body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body)); + memcpy(body, oqctl, sizeof(*body)); + + req->rq_replen = lustre_msg_size(0, NULL); + + spin_lock(&cli->cl_qchk_lock); + cli->cl_qchk_stat = CL_QUOTACHECKING; + spin_unlock(&cli->cl_qchk_lock); + + rc = ptlrpc_queue_wait(req); + if (rc) { + spin_lock(&cli->cl_qchk_lock); + cli->cl_qchk_stat = rc; + spin_unlock(&cli->cl_qchk_lock); + } + out: + ptlrpc_req_finished(req); + RETURN (rc); +} + +static int osc_poll_quotacheck(struct obd_export *exp, + struct if_quotacheck *qchk) +{ + struct client_obd *cli = &exp->exp_obd->u.cli; + int stat; + ENTRY; + + spin_lock(&cli->cl_qchk_lock); + stat = cli->cl_qchk_stat; + spin_unlock(&cli->cl_qchk_lock); + + qchk->stat = stat; + if (stat == CL_QUOTACHECKING) { + qchk->stat = -ENODATA; + stat = 0; + } else if (qchk->stat) { + if (qchk->stat > CL_QUOTACHECKING) + qchk->stat = stat = -EINTR; + + strncpy(qchk->obd_type, "obdfilter", 10); + qchk->obd_uuid = cli->cl_import->imp_target_uuid; + } + RETURN(stat); +} + +static int osc_quotactl(struct obd_export *exp, struct obd_quotactl *oqctl) +{ + struct ptlrpc_request *req; + struct obd_quotactl *oqc; + int size = sizeof(*oqctl); + int rc; + ENTRY; + + req = ptlrpc_prep_req(class_exp2cliimp(exp), OST_QUOTACTL, 1, &size, + NULL); + if (!req) + GOTO(out, rc = -ENOMEM); + + memcpy(lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*oqctl)), oqctl, size); + + req->rq_replen = lustre_msg_size(1, &size); + + rc = ptlrpc_queue_wait(req); + if (!rc) { + oqc = lustre_swab_repbuf(req, 0, sizeof (*oqc), + lustre_swab_obd_quotactl); + if (oqc == NULL) { + CERROR ("Can't unpack mds_body\n"); + GOTO(out, rc = -EPROTO); + } + + memcpy(oqctl, oqc, sizeof(*oqctl)); + } +out: + ptlrpc_req_finished(req); + RETURN (rc); +} + static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, void *karg, void *uarg) { @@ -2788,6 +2946,9 @@ static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, err = ptlrpc_set_import_active(obd->u.cli.cl_import, data->ioc_offset); GOTO(out, err); + case OBD_IOC_POLL_QUOTACHECK: + err = osc_poll_quotacheck(exp, (struct if_quotacheck *)karg); + GOTO(out, err); default: CDEBUG(D_INODE, "unrecognised ioctl %#x by %s\n", cmd, current->comm); GOTO(out, err = -ENOTTY); @@ -2909,7 +3070,7 @@ static int osc_set_info(struct obd_export *exp, obd_count keylen, rc = ptlrpc_queue_wait(req); ptlrpc_req_finished(req); - ctxt = llog_get_context(exp->exp_obd, LLOG_UNLINK_ORIG_CTXT); + ctxt = llog_get_context(exp->exp_obd, LLOG_MDS_OST_ORIG_CTXT); if (ctxt) { if (rc == 0) rc = llog_initiator_connect(ctxt); @@ -2930,21 +3091,21 @@ static struct llog_operations osc_size_repl_logops = { lop_cancel: llog_obd_repl_cancel }; -static struct llog_operations osc_unlink_orig_logops; +static struct llog_operations osc_mds_ost_orig_logops; static int osc_llog_init(struct obd_device *obd, struct obd_device *tgt, int count, struct llog_catid *catid) { int rc; ENTRY; - osc_unlink_orig_logops = llog_lvfs_ops; - osc_unlink_orig_logops.lop_setup = llog_obd_origin_setup; - osc_unlink_orig_logops.lop_cleanup = llog_obd_origin_cleanup; - osc_unlink_orig_logops.lop_add = llog_obd_origin_add; - osc_unlink_orig_logops.lop_connect = llog_origin_connect; + osc_mds_ost_orig_logops = llog_lvfs_ops; + osc_mds_ost_orig_logops.lop_setup = llog_obd_origin_setup; + osc_mds_ost_orig_logops.lop_cleanup = llog_obd_origin_cleanup; + osc_mds_ost_orig_logops.lop_add = llog_obd_origin_add; + osc_mds_ost_orig_logops.lop_connect = llog_origin_connect; - rc = llog_setup(obd, LLOG_UNLINK_ORIG_CTXT, tgt, count, - &catid->lci_logid, &osc_unlink_orig_logops); + rc = llog_setup(obd, LLOG_MDS_OST_ORIG_CTXT, tgt, count, + &catid->lci_logid, &osc_mds_ost_orig_logops); if (rc) RETURN(rc); @@ -2959,7 +3120,7 @@ static int osc_llog_finish(struct obd_device *obd, int count) int rc = 0, rc2 = 0; ENTRY; - ctxt = llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT); + ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT); if (ctxt) rc = llog_cleanup(ctxt); @@ -2972,7 +3133,6 @@ static int osc_llog_finish(struct obd_device *obd, int count) RETURN(rc); } - static int osc_disconnect(struct obd_export *exp) { struct obd_device *obd = class_exp2obd(exp); @@ -3079,6 +3239,7 @@ int osc_setup(struct obd_device *obd, obd_count len, void *buf) int osc_cleanup(struct obd_device *obd) { struct osc_creator *oscc = &obd->u.cli.cl_oscc; + struct client_obd *cli = &obd->u.cli; int rc; ptlrpc_lprocfs_unregister_obd(obd); @@ -3088,6 +3249,9 @@ int osc_cleanup(struct obd_device *obd) oscc->oscc_flags &= ~OSCC_FLAG_RECOVERING; oscc->oscc_flags |= OSCC_FLAG_EXITING; spin_unlock(&oscc->oscc_lock); + + /* free memory of osc quota cache */ + osc_qinfo_cleanup(cli); rc = client_obd_cleanup(obd); ptlrpcd_decref(); @@ -3112,6 +3276,7 @@ struct obd_ops osc_obd_ops = { .o_getattr = osc_getattr, .o_getattr_async = osc_getattr_async, .o_setattr = osc_setattr, + .o_setattr_async = osc_setattr_async, .o_brw = osc_brw, .o_brw_async = osc_brw_async, .o_prep_async_page = osc_prep_async_page, @@ -3134,6 +3299,8 @@ struct obd_ops osc_obd_ops = { .o_import_event = osc_import_event, .o_llog_init = osc_llog_init, .o_llog_finish = osc_llog_finish, + .o_quotacheck = osc_quotacheck, + .o_quotactl = osc_quotactl, }; #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) @@ -3194,6 +3361,8 @@ int __init osc_init(void) if (rc) class_unregister_type(LUSTRE_OSC_NAME); #endif + + rc = osc_qinfo_init(); RETURN(rc); } @@ -3201,6 +3370,7 @@ int __init osc_init(void) #ifdef __KERNEL__ static void /*__exit*/ osc_exit(void) { + osc_qinfo_exit(); #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) class_unregister_type(LUSTRE_SANOSC_NAME); #endif diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 44d4467..12d61cc 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -45,8 +45,11 @@ #include #include #include +#include #include "ost_internal.h" +static struct quotacheck_info qchkinfo; + void oti_init(struct obd_trans_info *oti, struct ptlrpc_request *req) { if (oti == NULL) @@ -913,6 +916,131 @@ static int ost_filter_recovery_request(struct ptlrpc_request *req, } } +static int ost_quotacheck_callback(struct obd_export *exp, + struct obd_quotactl *oqctl) +{ + struct ptlrpc_request *req; + struct obd_quotactl *body; + int rc, size = sizeof(*oqctl); + + req = ptlrpc_prep_req(exp->exp_imp_reverse, OBD_QC_CALLBACK, + 1, &size, NULL); + if (!req) + RETURN(-ENOMEM); + + body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body)); + memcpy(body, oqctl, sizeof(*oqctl)); + + req->rq_replen = lustre_msg_size(0, NULL); + + rc = ptlrpc_queue_wait(req); + ptlrpc_req_finished(req); + + RETURN(rc); +} + +static int ost_quotacheck_thread(void *data) +{ + unsigned long flags; + struct quotacheck_info *qchki = data; + struct obd_export *exp; + struct obd_quotactl *oqctl; + struct filter_obd *filter; + int rc; + + lock_kernel(); + ptlrpc_daemonize(); + + SIGNAL_MASK_LOCK(current, flags); + sigfillset(¤t->blocked); + RECALC_SIGPENDING; + SIGNAL_MASK_UNLOCK(current, flags); + + THREAD_NAME(current->comm, sizeof(current->comm) - 1, "%s", "quotacheck"); + unlock_kernel(); + + complete(&qchki->qi_starting); + + exp = qchki->qi_exp; + filter = &exp->exp_obd->u.filter; + oqctl = &qchki->qi_oqctl; + + obd_quotacheck(exp, oqctl); + rc = ost_quotacheck_callback(exp, oqctl); + + atomic_inc(&filter->fo_quotachecking); + + return rc; +} + +static int ost_quotacheck(struct ptlrpc_request *req) +{ + struct obd_device *obd = req->rq_export->exp_obd; + struct filter_obd *filter = &obd->u.filter; + struct obd_quotactl *oqctl; + int rc; + ENTRY; + + oqctl = lustre_swab_reqbuf(req, 0, sizeof(*oqctl), + lustre_swab_obd_quotactl); + if (oqctl == NULL) + GOTO(out, rc = -EPROTO); + + rc = lustre_pack_reply(req, 0, NULL, NULL); + if (rc) { + CERROR("ost: out of memory while packing quotacheck reply\n"); + GOTO(out, rc = -ENOMEM); + } + + if (!atomic_dec_and_test(&filter->fo_quotachecking)) { + atomic_inc(&filter->fo_quotachecking); + GOTO(out, rc = -EBUSY); + } + + init_completion(&qchkinfo.qi_starting); + qchkinfo.qi_exp = req->rq_export; + memcpy(&qchkinfo.qi_oqctl, oqctl, sizeof(*oqctl)); + + rc = kernel_thread(ost_quotacheck_thread, &qchkinfo, CLONE_VM|CLONE_FILES); + if (rc < 0) { + CERROR("%s: error starting ost_quotacheck_thread: %d\n", + obd->obd_name, rc); + atomic_inc(&filter->fo_quotachecking); + } else { + CDEBUG(D_INFO, "%s: ost_quotacheck_thread: %d\n", + obd->obd_name, rc); + wait_for_completion(&qchkinfo.qi_starting); + rc = 0; + } + + EXIT; +out: + return rc; +} + +static int ost_quotactl(struct ptlrpc_request *req) +{ + struct obd_quotactl *oqctl, *repoqc; + int rc, size = sizeof(*repoqc); + ENTRY; + + oqctl = lustre_swab_reqbuf(req, 0, sizeof(*oqctl), + lustre_swab_obd_quotactl); + if (oqctl == NULL) + GOTO(out, rc = -EPROTO); + + rc = lustre_pack_reply(req, 1, &size, NULL); + if (rc) + GOTO(out, rc); + + repoqc = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repoqc)); + memcpy(repoqc, oqctl, sizeof(*repoqc)); + + req->rq_status = obd_quotactl(req->rq_export, repoqc); +out: + RETURN(rc); +} + static int ost_handle(struct ptlrpc_request *req) { struct obd_trans_info trans_info = { 0, }; @@ -1047,6 +1175,16 @@ static int ost_handle(struct ptlrpc_request *req) DEBUG_REQ(D_INODE, req, "get_info"); rc = ost_get_info(req->rq_export, req); break; + case OST_QUOTACHECK: + CDEBUG(D_INODE, "quotacheck\n"); + OBD_FAIL_RETURN(OBD_FAIL_OST_QUOTACHECK_NET, 0); + rc = ost_quotacheck(req); + break; + case OST_QUOTACTL: + CDEBUG(D_INODE, "quotactl\n"); + OBD_FAIL_RETURN(OBD_FAIL_OST_QUOTACTL_NET, 0); + rc = ost_quotactl(req); + break; case OBD_PING: DEBUG_REQ(D_INODE, req, "ping"); rc = target_handle_ping(req); diff --git a/lustre/ptlrpc/llog_net.c b/lustre/ptlrpc/llog_net.c index fa2fc8e..51399a9 100644 --- a/lustre/ptlrpc/llog_net.c +++ b/lustre/ptlrpc/llog_net.c @@ -71,7 +71,7 @@ int llog_origin_connect(struct llog_ctxt *ctxt, int count, lgr->lgr_hdr.lrh_len = lgr->lgr_tail.lrt_len = sizeof(*lgr); lgr->lgr_hdr.lrh_type = LLOG_GEN_REC; lgr->lgr_gen = ctxt->loc_gen; - rc = llog_add(ctxt, &lgr->lgr_hdr, NULL, NULL, 1); + rc = llog_add(ctxt, &lgr->lgr_hdr, NULL, NULL, 1, NULL); OBD_FREE(lgr, sizeof(*lgr)); if (rc != 1) RETURN(rc); diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c index 5437cb4..910613f 100644 --- a/lustre/ptlrpc/lproc_ptlrpc.c +++ b/lustre/ptlrpc/lproc_ptlrpc.c @@ -53,6 +53,8 @@ struct ll_rpc_opcode { { OST_SAN_WRITE, "ost_san_write" }, { OST_SYNC, "ost_sync" }, { OST_SET_INFO, "ost_set_info" }, + { OST_QUOTACHECK, "ost_quotacheck" }, + { OST_QUOTACTL, "ost_quotactl" }, { MDS_GETATTR, "mds_getattr" }, { MDS_GETATTR_NAME, "mds_getattr_name" }, { MDS_CLOSE, "mds_close" }, @@ -67,6 +69,8 @@ struct ll_rpc_opcode { { MDS_SYNC, "mds_sync" }, { MDS_DONE_WRITING, "mds_done_writing" }, { MDS_SET_INFO, "mds_set_info" }, + { MDS_QUOTACHECK, "mds_quotacheck" }, + { MDS_QUOTACTL, "mds_quotactl" }, { LDLM_ENQUEUE, "ldlm_enqueue" }, { LDLM_CONVERT, "ldlm_convert" }, { LDLM_CANCEL, "ldlm_cancel" }, diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index 955fe2f..26c354e 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -462,6 +462,38 @@ void lustre_swab_mds_body (struct mds_body *b) __swab32s (&b->eadatasize); } +static void lustre_swab_obd_dqinfo (struct obd_dqinfo *i) +{ + __swab64s (&i->dqi_bgrace); + __swab64s (&i->dqi_igrace); + __swab32s (&i->dqi_flags); + __swab32s (&i->dqi_valid); +} + +static void lustre_swab_obd_dqblk (struct obd_dqblk *b) +{ + __swab64s (&b->dqb_ihardlimit); + __swab64s (&b->dqb_isoftlimit); + __swab64s (&b->dqb_curinodes); + __swab64s (&b->dqb_bhardlimit); + __swab64s (&b->dqb_bsoftlimit); + __swab64s (&b->dqb_curspace); + __swab64s (&b->dqb_btime); + __swab64s (&b->dqb_itime); + __swab32s (&b->dqb_valid); + __swab32s (&b->padding); +} + +void lustre_swab_obd_quotactl (struct obd_quotactl *q) +{ + __swab32s (&q->qc_cmd); + __swab32s (&q->qc_type); + __swab32s (&q->qc_id); + __swab32s (&q->qc_stat); + lustre_swab_obd_dqinfo (&q->qc_dqinfo); + lustre_swab_obd_dqblk (&q->qc_dqblk); +} + void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa) { __swab32s (&sa->sa_opcode); @@ -681,6 +713,60 @@ void lustre_swab_ptlbd_rsp (struct ptlbd_rsp *r) __swab16s (&r->r_error_cnt); } +/* no one calls this */ +int llog_log_swabbed(struct llog_log_hdr *hdr) +{ + if (hdr->llh_hdr.lrh_type == __swab32(LLOG_HDR_MAGIC)) + return 1; + if (hdr->llh_hdr.lrh_type == LLOG_HDR_MAGIC) + return 0; + return -1; +} + +void lustre_swab_llogd_body (struct llogd_body *d) +{ + __swab64s (&d->lgd_logid.lgl_oid); + __swab64s (&d->lgd_logid.lgl_ogr); + __swab32s (&d->lgd_logid.lgl_ogen); + __swab32s (&d->lgd_ctxt_idx); + __swab32s (&d->lgd_llh_flags); + __swab32s (&d->lgd_index); + __swab32s (&d->lgd_saved_index); + __swab32s (&d->lgd_len); + __swab64s (&d->lgd_cur_offset); +} + +void lustre_swab_llog_hdr (struct llog_log_hdr *h) +{ + __swab32s (&h->llh_hdr.lrh_index); + __swab32s (&h->llh_hdr.lrh_len); + __swab32s (&h->llh_hdr.lrh_type); + __swab64s (&h->llh_timestamp); + __swab32s (&h->llh_count); + __swab32s (&h->llh_bitmap_offset); + __swab32s (&h->llh_flags); + __swab32s (&h->llh_tail.lrt_index); + __swab32s (&h->llh_tail.lrt_len); +} + +void lustre_swab_llogd_conn_body (struct llogd_conn_body *d) +{ + __swab64s (&d->lgdc_gen.mnt_cnt); + __swab64s (&d->lgdc_gen.conn_cnt); + __swab64s (&d->lgdc_logid.lgl_oid); + __swab64s (&d->lgdc_logid.lgl_ogr); + __swab32s (&d->lgdc_logid.lgl_ogen); + __swab32s (&d->lgdc_ctxt_idx); +} + +void lustre_swab_qdata(struct qunit_data *d) +{ + __swab32s (&d->qd_id); + __swab32s (&d->qd_type); + __swab32s (&d->qd_count); + __swab32s (&d->qd_isblk); +} + void lustre_assert_wire_constants(void) { /* Wire protocol assertions generated by 'wirecheck' @@ -745,7 +831,11 @@ void lustre_assert_wire_constants(void) (long long)OST_SAN_WRITE); LASSERTF(OST_SYNC == 16, " found %lld\n", (long long)OST_SYNC); - LASSERTF(OST_LAST_OPC == 18, " found %lld\n", + LASSERTF(OST_QUOTACHECK == 18, " found %lld\n", + (long long)OST_QUOTACHECK); + LASSERTF(OST_QUOTACTL == 19, " found %lld\n", + (long long)OST_QUOTACTL); + LASSERTF(OST_LAST_OPC == 20, " found %lld\n", (long long)OST_LAST_OPC); LASSERTF(OBD_OBJECT_EOF == 0xffffffffffffffffULL," found %lld\n", (long long)OBD_OBJECT_EOF); @@ -779,7 +869,11 @@ void lustre_assert_wire_constants(void) (long long)MDS_DONE_WRITING); LASSERTF(MDS_SET_INFO == 46, " found %lld\n", (long long)MDS_SET_INFO); - LASSERTF(MDS_LAST_OPC == 47, " found %lld\n", + LASSERTF(MDS_QUOTACHECK == 47, " found %lld\n", + (long long)MDS_QUOTACHECK); + LASSERTF(MDS_QUOTACTL == 48, " found %lld\n", + (long long)MDS_QUOTACTL); + LASSERTF(MDS_LAST_OPC == 49, " found %lld\n", (long long)MDS_LAST_OPC); LASSERTF(REINT_SETATTR == 1, " found %lld\n", (long long)REINT_SETATTR); @@ -861,7 +955,9 @@ void lustre_assert_wire_constants(void) (long long)OBD_PING); LASSERTF(OBD_LOG_CANCEL == 401, " found %lld\n", (long long)OBD_LOG_CANCEL); - LASSERTF(OBD_LAST_OPC == 402, " found %lld\n", + LASSERTF(OBD_QC_CALLBACK == 402, " found %lld\n", + (long long)OBD_QC_CALLBACK); + LASSERTF(OBD_LAST_OPC == 403, " found %lld\n", (long long)OBD_LAST_OPC); /* Sizes and Offsets */ @@ -1847,6 +1943,8 @@ void lustre_assert_wire_constants(void) (long long)OST_RAID1_REC); LASSERTF(MDS_UNLINK_REC == 274801668, " found %lld\n", (long long)MDS_UNLINK_REC); + LASSERTF(MDS_SETATTR_REC == 274801665, " found %lld\n", + (long long)MDS_SETATTR_REC); LASSERTF(OBD_CFG_REC == 274857984, " found %lld\n", (long long)OBD_CFG_REC); LASSERTF(PTL_CFG_REC == 274923520, " found %lld\n", diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c index b4fd554..4629b71 100644 --- a/lustre/ptlrpc/ptlrpc_module.c +++ b/lustre/ptlrpc/ptlrpc_module.c @@ -157,6 +157,7 @@ EXPORT_SYMBOL(lustre_swab_ost_last_id); EXPORT_SYMBOL(lustre_swab_ost_lvb); EXPORT_SYMBOL(lustre_swab_mds_status_req); EXPORT_SYMBOL(lustre_swab_mds_body); +EXPORT_SYMBOL(lustre_swab_obd_quotactl); EXPORT_SYMBOL(lustre_swab_mds_rec_setattr); EXPORT_SYMBOL(lustre_swab_mds_rec_create); EXPORT_SYMBOL(lustre_swab_mds_rec_link); @@ -175,6 +176,7 @@ EXPORT_SYMBOL(lustre_swab_ldlm_reply); EXPORT_SYMBOL(lustre_swab_ptlbd_op); EXPORT_SYMBOL(lustre_swab_ptlbd_niob); EXPORT_SYMBOL(lustre_swab_ptlbd_rsp); +EXPORT_SYMBOL(lustre_swab_qdata); /* recover.c */ EXPORT_SYMBOL(ptlrpc_run_recovery_over_upcall); diff --git a/lustre/tests/lov.sh b/lustre/tests/lov.sh index e9ddfa7..ca6683b 100755 --- a/lustre/tests/lov.sh +++ b/lustre/tests/lov.sh @@ -16,7 +16,7 @@ MOUNT=${MOUNT:-/mnt/lustre} MOUNT2=${MOUNT2:-${MOUNT}2} NETTYPE=${NETTYPE:-tcp} -OSTCOUNT=${OSTCOUNT:-5} +OSTCOUNT=${OSTCOUNT:-2} # OSTDEVN will still override the device for OST N OSTSIZE=${OSTSIZE:-150000} diff --git a/lustre/tests/quota_sanity.sh b/lustre/tests/quota_sanity.sh new file mode 100644 index 0000000..9261668 --- /dev/null +++ b/lustre/tests/quota_sanity.sh @@ -0,0 +1,310 @@ +#!/bin/bash + +set -e +#set -vx + +SRCDIR=`dirname $0` +export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH:/sbin +. $SRCDIR/test-framework.sh + +LFS=${LFS:-lfs} +LCTL=${LCTL:-lctl} +USER="quota_usr" +TSTID=${TSTID:-60000} +RUNAS=${RUNAS:-"runas -u $TSTID"} +BLK_SZ=1024 +BUNIT_SZ=10 # 10 quota blocks +BTUNE_SZ=5 # 5 quota blocks +IUNIT_SZ=10 # 10 files +ITUNE_SZ=5 # 5 files + +MOUNT="`cat /proc/mounts | grep "lustre" | awk '{print $2}'`" +if [ -z "$MOUNT" ]; then + echo "ERROR: lustre not mounted, quit test!" + exit 1; +fi +OSTCOUNT=`cat /proc/fs/lustre/lov/*/activeobd | head -n 1` +TSTDIR="$MOUNT/quota_dir" + +# set_blk_tunables(bunit_sz, btune_sz) +set_blk_tunables() { + # set bunit and btune size on all obdfilters + for i in `ls /proc/fs/lustre/obdfilter/*/quota_btune_sz`; do + echo $(($2 * $BLK_SZ)) > $i + done + for i in `ls /proc/fs/lustre/obdfilter/*/quota_bunit_sz`; do + echo $(($1 * $BLK_SZ)) > $i + done; + # set bunit and btune size on mds + for i in `ls /proc/fs/lustre/mds/mds*/quota_btune_sz`; do + echo $(($2 * $BLK_SZ)) > $i + done + for i in `ls /proc/fs/lustre/mds/mds*/quota_bunit_sz`; do + echo $(($1 * $BLK_SZ)) > $i + done +} + +# set_file_tunables(iunit_sz, itune_sz) +set_file_tunables() { + # set iunit and itune size on all obdfilters + for i in `ls /proc/fs/lustre/obdfilter/*/quota_itune_sz`; do + echo $2 > $i + done + for i in `ls /proc/fs/lustre/obdfilter/*/quota_iunit_sz`; do + echo $1 > $i + done; + # set iunit and itune size on mds + for i in `ls /proc/fs/lustre/mds/mds*/quota_itune_sz`; do + echo $2 > $i + done + for i in `ls /proc/fs/lustre/mds/mds*/quota_iunit_sz`; do + echo $1 > $i + done +} + +prepare_test() { + # create test group + GRP="`cat /etc/group | grep "$USER" | awk -F: '{print $1}'`" + if [ -z "$GRP" ]; then + groupadd -g $TSTID "$USER" + fi + TSTID="`cat /etc/group | grep "$USER" | awk -F: '{print $3}'`" + + # create test user + USR="`cat /etc/passwd | grep "$USER" | awk -F: '{print $1}'`" + if [ -z "$USR" ]; then + useradd -u $TSTID -g $TSTID -d /tmp "$USER" + fi + + RUNAS="runas -u $TSTID" + # set block tunables + + set_blk_tunables $BUNIT_SZ $BTUNE_SZ + # set file tunaables + set_file_tunables $IUNIT_SZ $ITUNE_SZ + + [ -d $TSTDIR ] || mkdir $TSTDIR + chmod 777 $TSTDIR +} + +cleanup_test() { + # delete test user and group + userdel "$USER" + groupdel "$USER" + + # restore block tunables to default size + set_blk_tunables $((1024 * 100)) $((1024 * 50)) + # restore file tunables to default size + set_file_tunables 5000 2500 + + rm -fr $TSTDIR +} + +# set quota +test_1() { + echo "== Enable quota" + $LFS quotaoff -ug $MOUNT + $LFS quotacheck -ug $MOUNT + return 0 +} + +# block hard limit (normal use and out of quota) +test_2() { + echo "== Block hard limit" + LIMIT=$(( $BUNIT_SZ * $(($OSTCOUNT + 1)) * 10)) # 10 bunits each sever + TESTFILE="$TSTDIR/quota_tst20" + + echo " User quota (limit: $LIMIT bytes)" + $LFS setquota -u $USER 0 $LIMIT 0 0 $MOUNT + + $RUNAS touch $TESTFILE >/dev/null 2>&1 + + echo " Write ..." + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) > /dev/null 2>&1 || error "(usr) write failure, but expect success" + echo " Done" + echo " Write out of block quota ..." + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) seek=$(($LIMIT/2)) > /dev/null 2>&1 + # flush cache, ensure noquota flag is setted on client + sync; sleep 1; sync; + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$LIMIT > /dev/null 2>&1 && error "(usr) write success, but expect EDQUOT" + echo " EDQUOT" + + rm -f $TESTFILE + + echo " Group quota (limit: $LIMIT bytes)" + $LFS setquota -u $USER 0 0 0 0 $MOUNT # clear user limit + $LFS setquota -g $USER 0 $LIMIT 0 0 $MOUNT + TESTFILE="$TSTDIR/quota_tst21" + + $RUNAS touch $TESTFILE >/dev/null 2>&1 + + echo " Write ..." + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) > /dev/null 2>&1 || error "(grp) write failure, but expect success" + echo " Done" + echo " Write out of block quota ..." + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) seek=$(($LIMIT/2)) > /dev/null 2>&1 + sync; sleep 1; sync; + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$LIMIT > /dev/null 2>&1 && error "(grp) write success, but expect EDQUOT" + echo " EDQUOT" + + # cleanup + rm -f $TESTFILE + $LFS setquota -g $USER 0 0 0 0 $MOUNT + return 0 +} + +# file hard limit (normal use and out of quota) +test_3() { + echo "== File hard limit" + LIMIT=$(($IUNIT_SZ * 10)) # 10 iunits on mds + TESTFILE="$TSTDIR/quota_tst30" + + echo " User quota (limit: $LIMIT files)" + $LFS setquota -u $USER 0 0 0 $LIMIT $MOUNT + + echo " Create $LIMIT files ..." + for i in `seq ${LIMIT}`; do + $RUNAS touch ${TESTFILE}_$i > /dev/null 2>&1 || error "(usr) touch failure, but except success" + done + echo " Done" + echo " Create out of file quota ..." + $RUNAS touch ${TESTFILE}_xxx > /dev/null 2>&1 && error "(usr) touch success, but expect EDQUOT" + echo " EDQUOT" + + for i in `seq ${LIMIT}`; do + rm -f ${TESTFILE}_$i + done + + echo " Group quota (limit: $LIMIT files)" + $LFS setquota -u $USER 0 0 0 0 $MOUNT # clear user limit + $LFS setquota -g $USER 0 0 0 $LIMIT $MOUNT + TESTFILE="$TSTDIR/quota_tst31" + + echo " Create $LIMIT files ..." + for i in `seq ${LIMIT}`; do + $RUNAS touch ${TESTFILE}_$i > /dev/null 2>&1 || error "(grp) touch failure, but expect success" + done + echo " Done" + echo " Create out of file quota ..." + $RUNAS touch ${TESTFILE}_xxx > /dev/null 2>&1 && error "(grp) touch success, but expect EDQUOT" + echo " EDQUOT" + + # cleanup + for i in `seq ${LIMIT}`; do + rm -f ${TESTFILE}_$i + done + $LFS setquota -g $USER 0 0 0 0 $MOUNT + return 0 +} + +# block soft limit (start timer, timer goes off, stop timer) +test_4() { + echo "== Block soft limit" + echo " ** skipped" + return 0 +} + +# file soft limit (start timer, timer goes off, stop timer) +test_5() { + echo "== File soft limit" + echo " ** skipped" + return 0 +} + +# chown & chgrp (chown & chgrp successfully even out of block/file quota) +test_6() { + echo "== Chown/Chgrp ignore quota" + BLIMIT=$(( $BUNIT_SZ * $((OSTCOUNT + 1)) * 10)) # 10 bunits on each server + ILIMIT=$(( $IUNIT_SZ * 10 )) # 10 iunits on mds + + echo " Set quota limit (0 $BLIMIT 0 $ILIMIT) for $USER.$USER" + $LFS setquota -u $USER 0 $BLIMIT 0 $ILIMIT $MOUNT + $LFS setquota -g $USER 0 $BLIMIT 0 $ILIMIT $MOUNT + + echo " Create more than $ILIMIT files and alloc more than $BLIMIT blocks ..." + for i in `seq $(($ILIMIT + 1))`; do + touch $TSTDIR/quota_tst60_$i > /dev/null 2>&1 || error "touch failure, expect success" + done + dd if=/dev/zero of=$TSTDIR/quota_tst60_1 bs=$BLK_SZ count=$(($BLIMIT+1)) > /dev/null 2>&1 || error "write failure, expect success" + + echo " Chown files to $USER.$USER ..." + for i in `seq $(($ILIMIT + 1))`; do + chown $USER.$USER $TSTDIR/quota_tst60_$i > /dev/null 2>&1 || error "chown failure, but expect success" + done + + # cleanup + for i in `seq $(($ILIMIT + 1))`; do + rm -f $TSTDIR/quota_tst60_$i + done + $LFS setquota -u $USER 0 0 0 0 $MOUNT + $LFS setquota -g $USER 0 0 0 0 $MOUNT + return 0 +} + +# block quota acquire & release +test_7() { + echo "== Block quota acqurie / release" + + if [ $OSTCOUNT -lt 2 ]; then + echo "WARN: too few osts, skip this test." + return 0; + fi + + LIMIT=$(($BUNIT_SZ * $(($OSTCOUNT + 1)) * 10)) # 10 bunits per server + FILEA="$TSTDIR/quota_tst70_a" + FILEB="$TSTDIR/quota_tst70_b" + + echo " Set block limit $LIMIT bytes to $USER.$USER" + $LFS setquota -u $USER 0 $LIMIT 0 0 $MOUNT + $LFS setquota -g $USER 0 $LIMIT 0 0 $MOUNT + + echo " Create filea on OST0 and fileb on OST1" + $LFS setstripe $FILEA 65536 0 1 + $LFS setstripe $FILEB 65536 1 1 + chown $USER.$USER $FILEA + chown $USER.$USER $FILEB + + echo " Exceed quota limit ..." + $RUNAS dd if=/dev/zero of=$FILEB bs=$BLK_SZ count=$(($LIMIT - $BUNIT_SZ * $OSTCOUNT)) >/dev/null 2>&1 || error "write fileb failure, but expect success" + sync; sleep 1; sync; + $RUNAS dd if=/dev/zero of=$FILEB bs=$BLK_SZ seek=$LIMIT count=$BUNIT_SZ >/dev/null 2>&1 && error "write fileb success, but expect EDQUOT" + sync; sleep 1; sync; + echo " Write to OST0 return EDQUOT" + # this write of OST0 is cache write, will success + $RUNAS dd if=/dev/zero of=$FILEA bs=$BLK_SZ count=$(($BUNIT_SZ * 2)) >/dev/null 2>&1 || error "write filea failure, but expect success" + sync; sleep 1; sync; + $RUNAS dd if=/dev/zero of=$FILEA bs=$BLK_SZ count=$(($BUNIT_SZ * 2)) seek=$(($BUNIT_SZ *2)) >/dev/null 2>&1 && error "write filea success, but expect EDQUOT" + echo " EDQUOT" + + echo " Remove fileb to let OST1 release quota" + rm -f $FILEB + + echo " Write to OST0" + $RUNAS dd if=/dev/zero of=$FILEA bs=$BLK_SZ count=$(($LIMIT - $BUNIT_SZ * $OSTCOUNT)) >/dev/null 2>&1 || error "write filea failure, expect success" + echo " Done" + + # cleanup + rm -f $FILEA + $LFS setquota -u $USER 0 0 0 0 $MOUNT + $LFS setquota -g $USER 0 0 0 0 $MOUNT + return 0 +} + +# turn off quota +test_8() +{ + echo "=== Turn off quota" + $LFS quotaoff $MOUNT + return 0 +} + +prepare_test + +# run all tests +for j in `seq 8`; do + test_$j + echo "== Done" + echo " " +done + +cleanup_test diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index ef76583..98a7c03 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -1002,5 +1002,33 @@ test_56() { } run_test 56 "don't replay a symlink open request (3440)" +#recovery one mds-ost setattr from llog +test_57() { +#define OBD_FAIL_MDS_OST_SETATTR 0x12c + do_facet mds "sysctl -w lustre.fail_loc=0x8000012c" + touch $DIR/$tfile + replay_barrier mds + fail mds + sleep 1 + $CHECKSTAT -t file $DIR/$tfile || return 1 + do_facet mds "sysctl -w lustre.fail_loc=0x0" + rm $DIR/$tfile +} +run_test 57 "test recovery from llog for setattr op" + +#recovery many mds-ost setattr from llog +test_58() { +#define OBD_FAIL_MDS_OST_SETATTR 0x12c + do_facet mds "sysctl -w lustre.fail_loc=0x8000012c" + createmany -o $DIR/$tfile-%d 30000 + replay_barrier mds + fail mds + sleep 2 + $CHECKSTAT -t file $DIR/$tfile-* || return 1 + do_facet mds "sysctl -w lustre.fail_loc=0x0" + rm -f $DIR/$tfile-* +} +run_test 58 "test recovery from llog for setattr op (test llog_gen_rec)" + equals_msg test complete, cleaning up $CLEANUP diff --git a/lustre/tests/run-quotacheck.sh b/lustre/tests/run-quotacheck.sh new file mode 100644 index 0000000..85c5b79 --- /dev/null +++ b/lustre/tests/run-quotacheck.sh @@ -0,0 +1,30 @@ +#!/bin/bash +PATH=`dirname $0`:`dirname $0`/../utils:$PATH +TMP=${TMP:-/tmp} + +OBD=${1:-obdfilter} +TARGET=`ls /proc/fs/lustre/$OBD | grep -v num_refs | head -n 1` +[ -z "$TARGET" ] && echo "no TARGET available, skipping quotacheck test" && exit 0 + +insmod ../lvfs/quotacheck_test.ko || exit 1 +lctl modules > $TMP/ogdb-`hostname` +echo "NOW reload debugging syms.." + +RC=0 +lctl < $TMP/ogdb-`hostname` +echo "NOW reload debugging syms.." + +RC=0 +lctl < $TMP/ogdb-`hostname` +echo "NOW reload debugging syms.." + +RC=0 +lctl < #include #include +#include +#include +#include + #include #include @@ -40,6 +44,15 @@ #include "parser.h" #include "obdctl.h" +/* FIXME: Q_SYNC ... commands defined in linux/quota.h seems broken, + * so define new commands with the value in kernel */ +#define LUSTRE_Q_QUOTAON 0x800002 /* turn quotas on */ +#define LUSTRE_Q_QUOTAOFF 0x800003 /* turn quotas off */ +#define LUSTRE_Q_GETINFO 0x800005 /* get information about quota files */ +#define LUSTRE_Q_SETINFO 0x800006 /* set information about quota files */ +#define LUSTRE_Q_GETQUOTA 0x800007 /* get user quota structure */ +#define LUSTRE_Q_SETQUOTA 0x800008 /* set user quota structure */ + unsigned int portal_subsystem_debug = 0; /* all functions */ @@ -49,6 +62,12 @@ static int lfs_getstripe(int argc, char **argv); static int lfs_osts(int argc, char **argv); static int lfs_check(int argc, char **argv); static int lfs_catinfo(int argc, char **argv); +static int lfs_quotachog(int argc, char **argv); +static int lfs_quotacheck(int argc, char **argv); +static int lfs_quotaon(int argc, char **argv); +static int lfs_quotaoff(int argc, char **argv); +static int lfs_setquota(int argc, char **argv); +static int lfs_quota(int argc, char **argv); /* all avaialable commands */ command_t cmdlist[] = { @@ -79,6 +98,24 @@ command_t cmdlist[] = { "\tkeywords are one of followings: config, deletions.\n" "\tnode name must be provided when use keyword config."}, {"osts", lfs_osts, 0, "osts"}, + {"quotachog",lfs_quotachog, 0, + "Change all files owner or group in specified filesystem.\n" + "usage: quotachog [-i] \n" + "\t-i: ignore error if file is not exist\n"}, + {"quotacheck", lfs_quotacheck, 0, + "Scan the specified filesystem for disk usage, and create,\n" + "or update quota files.\n" + "usage: quotacheck [ -ug ] "}, + {"quotaon", lfs_quotaon, 0, "Turn filesystem quotas on.\n" + "usage: quotaon [ -ugf ] "}, + {"quotaoff", lfs_quotaoff, 0, "Turn filesystem quotas off.\n" + "usage: quotaoff [ -ug ] "}, + {"setquota", lfs_setquota, 0, "Set filesystem quotas.\n" + "usage: setquota [ -u | -g ] \n" + " setquota -t [ -u | -g ] "}, + {"quota", lfs_quota, 0, "Display disk usage and limits.\n" + "usage: quota -t [ -u |-g ] \n" + " quota [ -o obd_uuid ] [ -u | -g ] [name] "}, {"help", Parser_help, 0, "help"}, {"exit", Parser_quit, 0, "quit"}, {"quit", Parser_quit, 0, "quit"}, @@ -386,6 +423,531 @@ static int lfs_catinfo(int argc, char **argv) return rc; } + +static int lfs_quotachog(int argc, char **argv) +{ + + int c,rc; + int flag = 0; + + while ((c = getopt(argc, argv, "i")) != -1) { + switch (c) { + case 'i': + flag++; + break; + default: + fprintf(stderr, "error: %s: option '-%c' unrecognized\n", argv[0], c); + return CMD_HELP; + } + } + if (optind == argc) + return CMD_HELP; + rc = llapi_quotachog(argv[optind], flag); + if(rc) + fprintf(stderr,"error: change file owner/group failed.\n"); + return rc; +} + + +static int lfs_quotacheck(int argc, char **argv) +{ + int c, check_type = 0; + char *mnt; + struct if_quotacheck qchk; + struct if_quotactl qctl; + char *obd_type = qchk.obd_type; + char *obd_uuid = qchk.obd_uuid.uuid; + int rc; + + memset(&qchk, 0, sizeof(qchk)); + + optind = 0; + while ((c = getopt(argc, argv, "ug")) != -1) { + switch (c) { + case 'u': + check_type |= 0x01; + break; + case 'g': + check_type |= 0x02; + break; + default: + fprintf(stderr, "error: %s: option '-%c' unrecognized\n", argv[0], c); + return CMD_HELP; + } + } + + if (check_type) + check_type--; + + if (argc == optind) + return CMD_HELP; + + mnt = argv[optind]; + + memset(&qctl, 0, sizeof(qctl)); + qctl.qc_cmd = LUSTRE_Q_QUOTAOFF; + qctl.qc_id = QFMT_LDISKFS; + qctl.qc_type = check_type; + llapi_quotactl(mnt, &qctl); + + rc = llapi_quotacheck(mnt, check_type); + if (rc) { + fprintf(stderr, "quotacheck failed: %s\n", strerror(errno)); + return rc; + } + + rc = llapi_poll_quotacheck(mnt, &qchk); + if (rc) { + if (*obd_type) + fprintf(stderr, "%s %s ", obd_type, obd_uuid); + fprintf(stderr, "quota check failed: %s\n", strerror(errno)); + return rc; + } + + memset(&qctl, 0, sizeof(qctl)); + qctl.qc_cmd = LUSTRE_Q_QUOTAON; + qctl.qc_id = QFMT_LDISKFS; + qctl.qc_type = check_type; + rc = llapi_quotactl(mnt, &qctl); + if (rc) { + if (*obd_type) + fprintf(stderr, "%s %s ", + qctl.obd_type, qctl.obd_uuid.uuid); + fprintf(stderr, "%s turn on quota failed: %s\n", + argv[0], strerror(errno)); + return rc; + } + + return 0; +} + +static int lfs_quotaon(int argc, char **argv) +{ + int c; + char *mnt; + struct if_quotactl qctl; + char *obd_type = qctl.obd_type; + char *obd_uuid = qctl.obd_uuid.uuid; + int rc; + + memset(&qctl, 0, sizeof(qctl)); + qctl.qc_cmd = LUSTRE_Q_QUOTAON; + qctl.qc_id = QFMT_LDISKFS; + + optind = 0; + while ((c = getopt(argc, argv, "ugf")) != -1) { + switch (c) { + case 'u': + qctl.qc_type |= 0x01; + break; + case 'g': + qctl.qc_type |= 0x02; + break; + case 'f': + qctl.qc_cmd = LUSTRE_Q_QUOTAOFF; + break; + default: + fprintf(stderr, "error: %s: option '-%c' unrecognized\n", argv[0], c); + return CMD_HELP; + } + } + + if (qctl.qc_type) + qctl.qc_type--; + + if (argc == optind) + return CMD_HELP; + + mnt = argv[optind]; + + rc = llapi_quotactl(mnt, &qctl); + if (rc) { + if (*obd_type) + fprintf(stderr, "%s %s ", obd_type, obd_uuid); + fprintf(stderr, "%s failed: %s\n", argv[0], strerror(errno)); + return rc; + } + + return 0; +} + +static int lfs_quotaoff(int argc, char **argv) +{ + int c; + char *mnt; + struct if_quotactl qctl; + char *obd_type = qctl.obd_type; + char *obd_uuid = qctl.obd_uuid.uuid; + int rc; + + memset(&qctl, 0, sizeof(qctl)); + qctl.qc_cmd = LUSTRE_Q_QUOTAOFF; + + optind = 0; + while ((c = getopt(argc, argv, "ug")) != -1) { + switch (c) { + case 'u': + qctl.qc_type |= 0x01; + break; + case 'g': + qctl.qc_type |= 0x02; + break; + default: + fprintf(stderr, "error: %s: option '-%c' unrecognized\n", argv[0], c); + return CMD_HELP; + } + } + + if (qctl.qc_type) + qctl.qc_type--; + + if (argc == optind) + return CMD_HELP; + + mnt = argv[optind]; + + rc = llapi_quotactl(mnt, &qctl); + if (rc) { + if (*obd_type) + fprintf(stderr, "%s %s ", obd_type, obd_uuid); + fprintf(stderr, "quotaoff failed: %s\n", strerror(errno)); + return rc; + } + + return 0; +} + +static int name2id(unsigned int *id, char *name, int type) +{ + if (type == USRQUOTA) { + struct passwd *entry; + + if (!(entry = getpwnam(name))) { + if (!errno) + errno = ENOENT; + return -1; + } + + *id = entry->pw_uid; + } else { + struct group *entry; + + if (!(entry = getgrnam(name))) { + if (!errno) + errno = ENOENT; + return -1; + } + + *id = entry->gr_gid; + } + + return 0; +} + +static int id2name(char **name, unsigned int id, int type) +{ + if (type == USRQUOTA) { + struct passwd *entry; + + if (!(entry = getpwuid(id))) { + if (!errno) + errno = ENOENT; + return -1; + } + + *name = entry->pw_name; + } else { + struct group *entry; + + if (!(entry = getgrgid(id))) { + if (!errno) + errno = ENOENT; + return -1; + } + + *name = entry->gr_name; + } + + return 0; +} + +#define ARG2INT(nr, str, msg) \ +do { \ + char *endp; \ + nr = strtol(str, &endp, 0); \ + if (*endp) { \ + fprintf(stderr, "error: bad %s: %s\n", msg, str); \ + return CMD_HELP; \ + } \ +} while (0) + +int lfs_setquota(int argc, char **argv) +{ + int c; + char *mnt; + struct if_quotactl qctl; + char *obd_type = qctl.obd_type; + char *obd_uuid = qctl.obd_uuid.uuid; + int rc; + + memset(&qctl, 0, sizeof(qctl)); + qctl.qc_cmd = LUSTRE_Q_SETQUOTA; + + optind = 0; + while ((c = getopt(argc, argv, "ugt")) != -1) { + switch (c) { + case 'u': + qctl.qc_type |= 0x01; + break; + case 'g': + qctl.qc_type |= 0x02; + break; + case 't': + qctl.qc_cmd = LUSTRE_Q_SETINFO; + break; + default: + fprintf(stderr, "error: %s: option '-%c' unrecognized\n", argv[0], c); + return CMD_HELP; + } + } + + if (qctl.qc_type) + qctl.qc_type--; + + if (qctl.qc_type == UGQUOTA) { + fprintf(stderr, "error: user and group quotas can't be set together\n"); + return CMD_HELP; + } + + if (qctl.qc_cmd == LUSTRE_Q_SETQUOTA) { + struct if_dqblk *dqb = &qctl.qc_dqblk; + + if (optind + 6 != argc) + return CMD_HELP; + + rc = name2id(&qctl.qc_id, argv[optind++], qctl.qc_type); + if (rc) { + fprintf(stderr, "error: find id for name %s failed: %s\n", + argv[optind - 1], strerror(errno)); + return CMD_HELP; + } + + ARG2INT(dqb->dqb_bsoftlimit, argv[optind++], "block-softlimit"); + ARG2INT(dqb->dqb_bhardlimit, argv[optind++], "block-hardlimit"); + ARG2INT(dqb->dqb_isoftlimit, argv[optind++], "inode-softlimit"); + ARG2INT(dqb->dqb_ihardlimit, argv[optind++], "inode-hardlimit"); + } else { + struct if_dqinfo *dqi = &qctl.qc_dqinfo; + + if (optind + 3 != argc) + return CMD_HELP; + + ARG2INT(dqi->dqi_bgrace, argv[optind++], "block-grace"); + ARG2INT(dqi->dqi_igrace, argv[optind++], "inode-grace"); + } + + mnt = argv[optind]; + + rc = llapi_quotactl(mnt, &qctl); + if (rc) { + if (*obd_type) + fprintf(stderr, "%s %s ", obd_type, obd_uuid); + fprintf(stderr, "setquota failed: %s\n", strerror(errno)); + return rc; + } + + return 0; +} + +static inline char *type2name(int check_type) +{ + if (check_type == USRQUOTA) + return "user"; + else if (check_type == GRPQUOTA) + return "group"; + else + return "unknown"; +} + + +static void grace2str(time_t seconds,char *buf) +{ + uint minutes, hours, days; + + minutes = (seconds + 30) / 60; + hours = minutes / 60; + minutes %= 60; + days = hours / 24; + hours %= 24; + if (days >= 2) + snprintf(buf, 40, "%ddays", days); + else + snprintf(buf, 40, "%02d:%02d", hours + days * 24, minutes); +} + + +static void diff2str(time_t seconds, char *buf, time_t now) +{ + + buf[0] = 0; + if (!seconds) + return; + if (seconds <= now) { + strcpy(buf, "none"); + return; + } + grace2str(seconds - now, buf); +} + + +static void print_quota(char *mnt, char *name, struct if_quotactl *qctl) +{ + time_t now; + + time(&now); + + if (qctl->qc_cmd == LUSTRE_Q_GETQUOTA || qctl->qc_cmd == Q_GETOQUOTA) { + int bover = 0, iover = 0; + struct if_dqblk *dqb = &qctl->qc_dqblk; + + if (dqb->dqb_bhardlimit && + toqb(dqb->dqb_curspace) > dqb->dqb_bhardlimit) { + bover = 1; + } else if (dqb->dqb_bsoftlimit && + toqb(dqb->dqb_curspace) > dqb->dqb_bsoftlimit) { + if (dqb->dqb_btime > now) { + bover = 2; + } else { + bover = 3; + } + } + + if (dqb->dqb_ihardlimit && + dqb->dqb_curinodes > dqb->dqb_ihardlimit) { + iover = 1; + } else if (dqb->dqb_isoftlimit && + dqb->dqb_curinodes > dqb->dqb_isoftlimit) { + if (dqb->dqb_btime > now) { + iover = 2; + } else { + iover = 3; + } + } + + printf("Disk quotas for %s %s (%cid %u):\n", + type2name(qctl->qc_type), name, + *type2name(qctl->qc_type), qctl->qc_id); + printf("%15s%8s %7s%8s%8s%8s %7s%8s%8s\n", + "Filesystem", + "blocks", "quota", "limit", "grace", + "files", "quota", "limit", "grace"); + +#if 0 /* XXX: always print quotas even when no usages */ + if (dqb->dqb_curspace || dqb->dqb_curinodes) +#endif + { + char numbuf[3][32]; + char timebuf[40]; + + if (strlen(mnt) > 15) + printf("%s\n%15s", mnt, ""); + else + printf("%15s", mnt); + if (bover) + diff2str(dqb->dqb_btime, timebuf, now); + sprintf(numbuf[0], "%llu", toqb(dqb->dqb_curspace)); + sprintf(numbuf[1], "%llu", dqb->dqb_bsoftlimit); + sprintf(numbuf[2], "%llu", dqb->dqb_bhardlimit); + printf(" %7s%c %6s %7s %7s", numbuf[0], bover ? '*' : ' ', numbuf[1], + numbuf[2], bover > 1 ? timebuf : ""); + if (iover) + diff2str(dqb->dqb_itime, timebuf, now); + sprintf(numbuf[0], "%llu", dqb->dqb_curinodes); + sprintf(numbuf[1], "%llu", dqb->dqb_isoftlimit); + sprintf(numbuf[2], "%llu", dqb->dqb_ihardlimit); + printf(" %7s%c %6s %7s %7s\n", numbuf[0], iover ? '*' : ' ', numbuf[1], + numbuf[2], iover > 1 ? timebuf : ""); + } + } else if (qctl->qc_cmd == LUSTRE_Q_GETINFO || qctl->qc_cmd == Q_GETOINFO) { + char bgtimebuf[40]; + char igtimebuf[40]; + + grace2str(qctl->qc_dqinfo.dqi_bgrace, bgtimebuf); + grace2str(qctl->qc_dqinfo.dqi_igrace, igtimebuf); + printf("Block grace time: %s; Inode grace time: %s\n", bgtimebuf, igtimebuf); + } +} + +static int lfs_quota(int argc, char **argv) +{ + int c; + char *name = NULL, *mnt; + struct if_quotactl qctl; + char *obd_type = qctl.obd_type; + char *obd_uuid = qctl.obd_uuid.uuid; + int rc; + + memset(&qctl, 0, sizeof(qctl)); + qctl.qc_cmd = LUSTRE_Q_GETQUOTA; + + optind = 0; + while ((c = getopt(argc, argv, "ugto:")) != -1) { + switch (c) { + case 'u': + qctl.qc_type |= 0x01; + break; + case 'g': + qctl.qc_type |= 0x02; + break; + case 't': + qctl.qc_cmd = LUSTRE_Q_GETINFO; + break; + case 'o': + strncpy(obd_uuid, optarg, sizeof(qctl.obd_uuid)); + break; + default: + fprintf(stderr, "error: %s: option '-%c' unrecognized\n", argv[0], c); + return CMD_HELP; + } + } + + if (qctl.qc_type) + qctl.qc_type--; + + if (qctl.qc_type == UGQUOTA) { + fprintf(stderr, "error: user or group can't be specified together\n"); + return CMD_HELP; + } + + if (qctl.qc_cmd == LUSTRE_Q_GETQUOTA && optind + 2 == argc) { + name = argv[optind++]; + rc = name2id(&qctl.qc_id, name, qctl.qc_type); + if (rc) { + fprintf(stderr, "error: find id for name %s failed: %s\n", + name, strerror(errno)); + return CMD_HELP; + } + } else if (optind + 1 != argc) { + return CMD_HELP; + } + + mnt = argv[optind]; + + rc = llapi_quotactl(mnt, &qctl); + if (rc) { + if (*obd_type) + fprintf(stderr, "%s %s ", obd_type, obd_uuid); + fprintf(stderr, "quota failed: %s\n", strerror(errno)); + return rc; + } + + if (!name) + rc = id2name(&name, getuid(), qctl.qc_type); + + print_quota(mnt, name, &qctl); + return 0; +} + int main(int argc, char **argv) { int rc; diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index 9684127..f345582 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -165,9 +166,12 @@ struct find_param { int quiet; struct obd_uuid *obduuid; int lumlen; - struct lov_user_md *lum; + struct lov_user_mds_data *lmd; +/* struct lov_user_md *lum;*/ int got_uuids; int obdindex; + int (* process_file)(DIR *dir, char *dname, char *fname, + struct find_param *param); }; /* XXX Max obds per lov currently hardcoded to 1000 in lov/lov_obd.c */ @@ -177,9 +181,9 @@ struct find_param { static int prepare_find(struct find_param *param) { param->lumlen = lov_mds_md_size(MAX_LOV_UUID_COUNT); - if ((param->lum = malloc(param->lumlen)) == NULL) { + if ((param->lmd = malloc(sizeof(lstat_t) + param->lumlen)) == NULL) { err_msg("unable to allocate %d bytes of memory for ioctl", - param->lumlen); + sizeof(lstat_t) + param->lumlen); return ENOMEM; } @@ -193,8 +197,8 @@ static void cleanup_find(struct find_param *param) { if (param->obduuid) free(param->obduuid); - if (param->lum) - free(param->lum); + if (param->lmd) + free(param->lmd); } int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count) @@ -349,15 +353,15 @@ void lov_dump_user_lmm_v1(struct lov_user_md_v1 *lum, char *dname, char *fname, void llapi_lov_dump_user_lmm(struct find_param *param, char *dname, char *fname) { - switch(*(__u32 *)param->lum) { /* lum->lmm_magic */ + switch(*(__u32 *)¶m->lmd->lmd_lmm) { /* lum->lmm_magic */ case LOV_USER_MAGIC_V1: - lov_dump_user_lmm_v1(param->lum, dname, fname, param->obdindex, + lov_dump_user_lmm_v1(¶m->lmd->lmd_lmm, dname, fname, param->obdindex, param->quiet, param->verbose, (param->verbose || !param->obduuid)); break; default: printf("unknown lmm_magic: %#x (expecting %#x)\n", - *(__u32 *)param->lum, LOV_USER_MAGIC_V1); + *(__u32 *)¶m->lmd->lmd_lmm, LOV_USER_MAGIC_V1); return; } } @@ -411,14 +415,14 @@ int op_get_file_stripe(char *path, struct lov_user_md *lum) return llapi_file_get_stripe(path, lum); } -static int process_file(DIR *dir, char *dname, char *fname, +static int find_process_file(DIR *dir, char *dname, char *fname, struct find_param *param) { int rc; - strncpy((char *)param->lum, fname, param->lumlen); + strncpy((char *)¶m->lmd->lmd_lmm, fname, param->lumlen); - rc = ioctl(dirfd(dir), IOC_MDC_GETSTRIPE, (void *)param->lum); + rc = ioctl(dirfd(dir), IOC_MDC_GETSTRIPE, (void *)¶m->lmd->lmd_lmm); if (rc) { if (errno == ENODATA) { if (!param->obduuid && !param->quiet) @@ -478,8 +482,8 @@ static int process_dir(DIR *dir, char *dname, struct find_param *param) } /* retrieve dir's stripe info */ - strncpy((char *)param->lum, dname, param->lumlen); - rc = ioctl(dirfd(dir), LL_IOC_LOV_GETSTRIPE, (void *)param->lum); + strncpy((char *)¶m->lmd->lmd_lmm, dname, param->lumlen); + rc = ioctl(dirfd(dir), LL_IOC_LOV_GETSTRIPE, (void *)¶m->lmd->lmd_lmm); if (rc) { if (errno == ENODATA) { if (!param->obduuid && param->verbose) @@ -528,7 +532,7 @@ static int process_dir(DIR *dir, char *dname, struct find_param *param) return rc; break; case DT_REG: - rc = process_file(dir, dname, dirp->d_name, param); + rc = param->process_file(dir, dname, dirp->d_name, param); if (rc) return rc; break; @@ -580,7 +584,7 @@ static int process_path(char *path, struct find_param *param) if (!param->got_uuids) rc = setup_obd_uuids(dir, dname, param); if (rc == 0) - rc = process_file(dir, dname, fname, param); + rc = param->process_file(dir, dname, fname, param); closedir(dir); } } @@ -598,6 +602,7 @@ int llapi_find(char *path, struct obd_uuid *obduuid, int recursive, param.recursive = recursive; param.verbose = verbose; param.quiet = quiet; + param.process_file = find_process_file; if (obduuid) { param.obduuid = malloc(sizeof(*obduuid)); if (param.obduuid == NULL) { @@ -742,3 +747,117 @@ int llapi_is_lustre_mnttype(char *type) { return (strcmp(type,"lustre") == 0 || strcmp(type,"lustre_lite") == 0); } + +int llapi_quotacheck(char *mnt, int check_type) +{ + DIR *root; + int rc; + + root = opendir(mnt); + if (!root) { + err_msg("open %s failed", mnt); + return -1; + } + + rc = ioctl(dirfd(root), LL_IOC_QUOTACHECK, check_type); + + closedir(root); + return rc; +} + +int llapi_poll_quotacheck(char *mnt, struct if_quotacheck *qchk) +{ + DIR *root; + int poll_intvl = 2; + int rc; + + root = opendir(mnt); + if (!root) { + err_msg("open %s failed", mnt); + return -1; + } + + while (1) { + rc = ioctl(dirfd(root), LL_IOC_POLL_QUOTACHECK, qchk); + if (!rc || errno != ENODATA) + break; + sleep(poll_intvl); + if (poll_intvl < 30) + poll_intvl *= 2; + } + + closedir(root); + return rc; +} + +int llapi_quotactl(char *mnt, struct if_quotactl *qctl) +{ + DIR *root; + int rc; + + root = opendir(mnt); + if (!root) { + err_msg("open %s failed", mnt); + return -1; + } + + rc = ioctl(dirfd(root), LL_IOC_QUOTACTL, qctl); + + closedir(root); + return rc; +} + +static int quotachog_process_file(DIR *dir, char *dname, char *fname, + struct find_param *param) +{ + lstat_t *st; + char pathname[PATH_MAX + 1] = ""; + int rc; + + strncpy((char *)param->lmd, fname, param->lumlen); + + rc = ioctl(dirfd(dir), IOC_MDC_GETFILEINFO, (void *)param->lmd); + if (rc) { + if (errno == ENODATA) { + if (!param->obduuid && !param->quiet) + fprintf(stderr, + "%s/%s has no stripe info\n", + dname, fname); + rc = 0; + } else if (errno != EISDIR) { + err_msg("IOC_MDC_GETFILEINFO ioctl failed"); + rc = errno; + } + return rc; + } + + st = ¶m->lmd->lmd_st; + snprintf(pathname, sizeof(pathname), "%s/%s", dname, fname); + rc = syscall(SYS_chown, pathname, st->st_uid, st->st_gid); + if (rc) + fprintf(stderr, "chown %s (%u,%u) fail: %s\n", + pathname, st->st_uid, st->st_gid, strerror(errno)); + return rc; +} + +int llapi_quotachog(char *path, int flag) +{ + struct find_param param; + int ret = 0; + + memset(¶m, 0, sizeof(param)); + param.recursive = 1; + param.verbose = 0; + param.quiet = 1; + param.process_file = quotachog_process_file; + + ret = prepare_find(¶m); + if (ret) + goto out; + + process_path(path, ¶m); +out: + cleanup_find(¶m); + return ret; +} + diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index cc53e12..a301783 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -201,6 +201,38 @@ check_obd_ioobj(void) } void +check_obd_quotactl(void) +{ + BLANK_LINE(); + CHECK_STRUCT(obd_quotactl); + CHECK_MEMBER(obd_quotactl, qc_cmd); + CHECK_MEMBER(obd_quotactl, qc_type); + CHECK_MEMBER(obd_quotactl, qc_id); + CHECK_MEMBER(obd_quotactl, qc_stat); + CHECK_MEMBER(obd_quotactl, qc_dqinfo); + CHECK_MEMBER(obd_quotactl, qc_dqblk); + + BLANK_LINE(); + CHECK_STRUCT(obd_dqinfo); + CHECK_MEMBER(obd_dqinfo, dqi_bgrace); + CHECK_MEMBER(obd_dqinfo, dqi_igrace); + CHECK_MEMBER(obd_dqinfo, dqi_flags); + CHECK_MEMBER(obd_dqinfo, dqi_valid); + + BLANK_LINE(); + CHECK_STRUCT(obd_dqblk); + CHECK_MEMBER(obd_dqblk, dqb_bhardlimit); + CHECK_MEMBER(obd_dqblk, dqb_bsoftlimit); + CHECK_MEMBER(obd_dqblk, dqb_curspace); + CHECK_MEMBER(obd_dqblk, dqb_ihardlimit); + CHECK_MEMBER(obd_dqblk, dqb_isoftlimit); + CHECK_MEMBER(obd_dqblk, dqb_curinodes); + CHECK_MEMBER(obd_dqblk, dqb_btime); + CHECK_MEMBER(obd_dqblk, dqb_itime); + CHECK_MEMBER(obd_dqblk, dqb_valid); +} + +void check_niobuf_remote(void) { BLANK_LINE(); @@ -522,6 +554,7 @@ check_llog_logid(void) CHECK_VALUE(OST_SZ_REC); CHECK_VALUE(OST_RAID1_REC); CHECK_VALUE(MDS_UNLINK_REC); + CHECK_VALUE(MDS_SETATTR_REC); CHECK_VALUE(OBD_CFG_REC); CHECK_VALUE(PTL_CFG_REC); CHECK_VALUE(LLOG_GEN_REC); @@ -690,6 +723,17 @@ check_llogd_conn_body(void) } void +check_qunit_data(void) +{ + BLANK_LINE(); + CHECK_STRUCT(qunit_data); + CHECK_MEMBER(qunit_data, qd_id); + CHECK_MEMBER(qunit_data, qd_type); + CHECK_MEMBER(qunit_data, qd_count); + CHECK_MEMBER(qunit_data, qd_isblk); +} + +void system_string (char *cmdline, char *str, int len) { int fds[2]; @@ -791,6 +835,8 @@ main(int argc, char **argv) CHECK_VALUE(OST_SAN_READ); CHECK_VALUE(OST_SAN_WRITE); CHECK_VALUE(OST_SYNC); + CHECK_VALUE(OST_QUOTACHECK); + CHECK_VALUE(OST_QUOTACTL); CHECK_VALUE(OST_LAST_OPC); CHECK_DEFINE(OBD_OBJECT_EOF); @@ -811,6 +857,8 @@ main(int argc, char **argv) CHECK_VALUE(MDS_SYNC); CHECK_VALUE(MDS_DONE_WRITING); CHECK_VALUE(MDS_SET_INFO); + CHECK_VALUE(MDS_QUOTACHECK); + CHECK_VALUE(MDS_QUOTACTL); CHECK_VALUE(MDS_LAST_OPC); CHECK_VALUE(REINT_SETATTR); @@ -861,8 +909,12 @@ main(int argc, char **argv) CHECK_VALUE(OBD_PING); CHECK_VALUE(OBD_LOG_CANCEL); + CHECK_VALUE(OBD_QC_CALLBACK); CHECK_VALUE(OBD_LAST_OPC); + CHECK_VALUE(QUOTA_DQACQ); + CHECK_VALUE(QUOTA_DQREL); + COMMENT("Sizes and Offsets"); BLANK_LINE(); check_lustre_handle(); @@ -871,6 +923,7 @@ main(int argc, char **argv) check_lov_mds_md_v1(); check_obd_statfs(); check_obd_ioobj(); + check_obd_quotactl(); check_niobuf_remote(); check_ost_body(); check_ll_fid(); @@ -909,6 +962,7 @@ main(int argc, char **argv) check_llog_cookie(); check_llogd_body(); check_llogd_conn_body(); + check_qunit_data(); printf("}\n\n"); diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index b8ec996..4c20368 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -25,8 +25,8 @@ int main() void lustre_assert_wire_constants(void) { /* Wire protocol assertions generated by 'wirecheck' - * running on Linux milano 2.6.5-7.141_87k.3-b1_4_bgl.200503212049bigsmp #1 SMP Mon Mar 21 20 - * with gcc version 3.3.4 20040817 (Red Hat Linux 3.3.4-2) */ + * running on Linux localhost.localdomain 2.4.20-8 #3 ËÄ 9ÔÂ 23 15:12:02 CST 2004 i686 i686 i + * with gcc version 3.2.2 20030222 (Red Hat Linux 3.2.2-5) */ /* Constants... */ @@ -86,7 +86,11 @@ void lustre_assert_wire_constants(void) (long long)OST_SAN_WRITE); LASSERTF(OST_SYNC == 16, " found %lld\n", (long long)OST_SYNC); - LASSERTF(OST_LAST_OPC == 18, " found %lld\n", + LASSERTF(OST_QUOTACHECK == 18, " found %lld\n", + (long long)OST_QUOTACHECK); + LASSERTF(OST_QUOTACTL == 19, " found %lld\n", + (long long)OST_QUOTACTL); + LASSERTF(OST_LAST_OPC == 20, " found %lld\n", (long long)OST_LAST_OPC); LASSERTF(OBD_OBJECT_EOF == 0xffffffffffffffffULL," found %lld\n", (long long)OBD_OBJECT_EOF); @@ -120,7 +124,11 @@ void lustre_assert_wire_constants(void) (long long)MDS_DONE_WRITING); LASSERTF(MDS_SET_INFO == 46, " found %lld\n", (long long)MDS_SET_INFO); - LASSERTF(MDS_LAST_OPC == 47, " found %lld\n", + LASSERTF(MDS_QUOTACHECK == 47, " found %lld\n", + (long long)MDS_QUOTACHECK); + LASSERTF(MDS_QUOTACTL == 48, " found %lld\n", + (long long)MDS_QUOTACTL); + LASSERTF(MDS_LAST_OPC == 49, " found %lld\n", (long long)MDS_LAST_OPC); LASSERTF(REINT_SETATTR == 1, " found %lld\n", (long long)REINT_SETATTR); @@ -202,8 +210,14 @@ void lustre_assert_wire_constants(void) (long long)OBD_PING); LASSERTF(OBD_LOG_CANCEL == 401, " found %lld\n", (long long)OBD_LOG_CANCEL); - LASSERTF(OBD_LAST_OPC == 402, " found %lld\n", + LASSERTF(OBD_QC_CALLBACK == 402, " found %lld\n", + (long long)OBD_QC_CALLBACK); + LASSERTF(OBD_LAST_OPC == 403, " found %lld\n", (long long)OBD_LAST_OPC); + LASSERTF(QUOTA_DQACQ == 601, " found %lld\n", + (long long)QUOTA_DQACQ); + LASSERTF(QUOTA_DQREL == 602, " found %lld\n", + (long long)QUOTA_DQREL); /* Sizes and Offsets */ @@ -517,6 +531,94 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct obd_ioobj *)0)->ioo_bufcnt) == 4, " found %lld\n", (long long)(int)sizeof(((struct obd_ioobj *)0)->ioo_bufcnt)); + /* Checks for struct obd_quotactl */ + LASSERTF((int)sizeof(struct obd_quotactl) == 112, " found %lld\n", + (long long)(int)sizeof(struct obd_quotactl)); + LASSERTF((int)offsetof(struct obd_quotactl, qc_cmd) == 0, " found %lld\n", + (long long)(int)offsetof(struct obd_quotactl, qc_cmd)); + LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_cmd) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_cmd)); + LASSERTF((int)offsetof(struct obd_quotactl, qc_type) == 4, " found %lld\n", + (long long)(int)offsetof(struct obd_quotactl, qc_type)); + LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_type) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_type)); + LASSERTF((int)offsetof(struct obd_quotactl, qc_id) == 8, " found %lld\n", + (long long)(int)offsetof(struct obd_quotactl, qc_id)); + LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_id) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_id)); + LASSERTF((int)offsetof(struct obd_quotactl, qc_stat) == 12, " found %lld\n", + (long long)(int)offsetof(struct obd_quotactl, qc_stat)); + LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_stat) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_stat)); + LASSERTF((int)offsetof(struct obd_quotactl, qc_dqinfo) == 16, " found %lld\n", + (long long)(int)offsetof(struct obd_quotactl, qc_dqinfo)); + LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_dqinfo) == 24, " found %lld\n", + (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_dqinfo)); + LASSERTF((int)offsetof(struct obd_quotactl, qc_dqblk) == 40, " found %lld\n", + (long long)(int)offsetof(struct obd_quotactl, qc_dqblk)); + LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_dqblk) == 72, " found %lld\n", + (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_dqblk)); + + /* Checks for struct obd_dqinfo */ + LASSERTF((int)sizeof(struct obd_dqinfo) == 24, " found %lld\n", + (long long)(int)sizeof(struct obd_dqinfo)); + LASSERTF((int)offsetof(struct obd_dqinfo, dqi_bgrace) == 0, " found %lld\n", + (long long)(int)offsetof(struct obd_dqinfo, dqi_bgrace)); + LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_bgrace) == 8, " found %lld\n", + (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_bgrace)); + LASSERTF((int)offsetof(struct obd_dqinfo, dqi_igrace) == 8, " found %lld\n", + (long long)(int)offsetof(struct obd_dqinfo, dqi_igrace)); + LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_igrace) == 8, " found %lld\n", + (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_igrace)); + LASSERTF((int)offsetof(struct obd_dqinfo, dqi_flags) == 16, " found %lld\n", + (long long)(int)offsetof(struct obd_dqinfo, dqi_flags)); + LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_flags) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_flags)); + LASSERTF((int)offsetof(struct obd_dqinfo, dqi_valid) == 20, " found %lld\n", + (long long)(int)offsetof(struct obd_dqinfo, dqi_valid)); + LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_valid) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_valid)); + + /* Checks for struct obd_dqblk */ + LASSERTF((int)sizeof(struct obd_dqblk) == 72, " found %lld\n", + (long long)(int)sizeof(struct obd_dqblk)); + LASSERTF((int)offsetof(struct obd_dqblk, dqb_bhardlimit) == 0, " found %lld\n", + (long long)(int)offsetof(struct obd_dqblk, dqb_bhardlimit)); + LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_bhardlimit) == 8, " found %lld\n", + (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_bhardlimit)); + LASSERTF((int)offsetof(struct obd_dqblk, dqb_bsoftlimit) == 8, " found %lld\n", + (long long)(int)offsetof(struct obd_dqblk, dqb_bsoftlimit)); + LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_bsoftlimit) == 8, " found %lld\n", + (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_bsoftlimit)); + LASSERTF((int)offsetof(struct obd_dqblk, dqb_curspace) == 16, " found %lld\n", + (long long)(int)offsetof(struct obd_dqblk, dqb_curspace)); + LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_curspace) == 8, " found %lld\n", + (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_curspace)); + LASSERTF((int)offsetof(struct obd_dqblk, dqb_ihardlimit) == 24, " found %lld\n", + (long long)(int)offsetof(struct obd_dqblk, dqb_ihardlimit)); + LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_ihardlimit) == 8, " found %lld\n", + (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_ihardlimit)); + LASSERTF((int)offsetof(struct obd_dqblk, dqb_isoftlimit) == 32, " found %lld\n", + (long long)(int)offsetof(struct obd_dqblk, dqb_isoftlimit)); + LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_isoftlimit) == 8, " found %lld\n", + (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_isoftlimit)); + LASSERTF((int)offsetof(struct obd_dqblk, dqb_curinodes) == 40, " found %lld\n", + (long long)(int)offsetof(struct obd_dqblk, dqb_curinodes)); + LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_curinodes) == 8, " found %lld\n", + (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_curinodes)); + LASSERTF((int)offsetof(struct obd_dqblk, dqb_btime) == 48, " found %lld\n", + (long long)(int)offsetof(struct obd_dqblk, dqb_btime)); + LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_btime) == 8, " found %lld\n", + (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_btime)); + LASSERTF((int)offsetof(struct obd_dqblk, dqb_itime) == 56, " found %lld\n", + (long long)(int)offsetof(struct obd_dqblk, dqb_itime)); + LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_itime) == 8, " found %lld\n", + (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_itime)); + LASSERTF((int)offsetof(struct obd_dqblk, dqb_valid) == 64, " found %lld\n", + (long long)(int)offsetof(struct obd_dqblk, dqb_valid)); + LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_valid) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_valid)); + /* Checks for struct niobuf_remote */ LASSERTF((int)sizeof(struct niobuf_remote) == 16, " found %lld\n", (long long)(int)sizeof(struct niobuf_remote)); @@ -1188,6 +1290,8 @@ void lustre_assert_wire_constants(void) (long long)OST_RAID1_REC); LASSERTF(MDS_UNLINK_REC == 274801668, " found %lld\n", (long long)MDS_UNLINK_REC); + LASSERTF(MDS_SETATTR_REC == 274801665, " found %lld\n", + (long long)MDS_SETATTR_REC); LASSERTF(OBD_CFG_REC == 274857984, " found %lld\n", (long long)OBD_CFG_REC); LASSERTF(PTL_CFG_REC == 274923520, " found %lld\n", @@ -1484,5 +1588,25 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llogd_conn_body, lgdc_ctxt_idx)); LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx) == 4, " found %lld\n", (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx)); + + /* Checks for struct qunit_data */ + LASSERTF((int)sizeof(struct qunit_data) == 16, " found %lld\n", + (long long)(int)sizeof(struct qunit_data)); + LASSERTF((int)offsetof(struct qunit_data, qd_id) == 0, " found %lld\n", + (long long)(int)offsetof(struct qunit_data, qd_id)); + LASSERTF((int)sizeof(((struct qunit_data *)0)->qd_id) == 4, " found %lld\n", + (long long)(int)sizeof(((struct qunit_data *)0)->qd_id)); + LASSERTF((int)offsetof(struct qunit_data, qd_type) == 4, " found %lld\n", + (long long)(int)offsetof(struct qunit_data, qd_type)); + LASSERTF((int)sizeof(((struct qunit_data *)0)->qd_type) == 4, " found %lld\n", + (long long)(int)sizeof(((struct qunit_data *)0)->qd_type)); + LASSERTF((int)offsetof(struct qunit_data, qd_count) == 8, " found %lld\n", + (long long)(int)offsetof(struct qunit_data, qd_count)); + LASSERTF((int)sizeof(((struct qunit_data *)0)->qd_count) == 4, " found %lld\n", + (long long)(int)sizeof(((struct qunit_data *)0)->qd_count)); + LASSERTF((int)offsetof(struct qunit_data, qd_isblk) == 12, " found %lld\n", + (long long)(int)offsetof(struct qunit_data, qd_isblk)); + LASSERTF((int)sizeof(((struct qunit_data *)0)->qd_isblk) == 4, " found %lld\n", + (long long)(int)sizeof(((struct qunit_data *)0)->qd_isblk)); }